intra_est.cpp revision 29a84457aed4c45bc900998b5e11c03023264208
1/* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18#include "avcenc_lib.h"
19
20#define TH_I4  0  /* threshold biasing toward I16 mode instead of I4 mode */
21#define TH_Intra  0 /* threshold biasing toward INTER mode instead of intra mode */
22
23#define FIXED_INTRAPRED_MODE  AVC_I16
24#define FIXED_I16_MODE  AVC_I16_DC
25#define FIXED_I4_MODE   AVC_I4_Diagonal_Down_Left
26#define FIXED_INTRA_CHROMA_MODE AVC_IC_DC
27
28#define CLIP_RESULT(x)      if((uint)x > 0xFF){ \
29                 x = 0xFF & (~(x>>31));}
30
31
32bool IntraDecisionABE(AVCEncObject *encvid, int min_cost, uint8 *curL, int picPitch)
33{
34    AVCCommonObj *video = encvid->common;
35    AVCFrameIO *currInput = encvid->currInput;
36    int orgPitch = currInput->pitch;
37    int x_pos = (video->mb_x) << 4;
38    int y_pos = (video->mb_y) << 4;
39    uint8 *orgY = currInput->YCbCr[0] + y_pos * orgPitch + x_pos;
40    int j;
41    uint8 *topL, *leftL, *orgY_2, *orgY_3;
42    int temp, SBE, offset;
43    OsclFloat ABE;
44    bool intra = true;
45
46    if (((x_pos >> 4) != (int)video->PicWidthInMbs - 1) &&
47            ((y_pos >> 4) != (int)video->PicHeightInMbs - 1) &&
48            video->intraAvailA &&
49            video->intraAvailB)
50    {
51        SBE = 0;
52        /* top neighbor */
53        topL = curL - picPitch;
54        /* left neighbor */
55        leftL = curL - 1;
56        orgY_2 = orgY - orgPitch;
57
58        for (j = 0; j < 16; j++)
59        {
60            temp = *topL++ - orgY[j];
61            SBE += ((temp >= 0) ? temp : -temp);
62            temp = *(leftL += picPitch) - *(orgY_2 += orgPitch);
63            SBE += ((temp >= 0) ? temp : -temp);
64        }
65
66        /* calculate chroma */
67        offset = (y_pos >> 2) * picPitch + (x_pos >> 1);
68        topL = video->currPic->Scb + offset;
69        orgY_2 = currInput->YCbCr[1] + offset + (y_pos >> 2) * (orgPitch - picPitch);
70
71        leftL = topL - 1;
72        topL -= (picPitch >> 1);
73        orgY_3 = orgY_2 - (orgPitch >> 1);
74        for (j = 0; j < 8; j++)
75        {
76            temp = *topL++ - orgY_2[j];
77            SBE += ((temp >= 0) ? temp : -temp);
78            temp = *(leftL += (picPitch >> 1)) - *(orgY_3 += (orgPitch >> 1));
79            SBE += ((temp >= 0) ? temp : -temp);
80        }
81
82        topL = video->currPic->Scr + offset;
83        orgY_2 = currInput->YCbCr[2] + offset + (y_pos >> 2) * (orgPitch - picPitch);
84
85        leftL = topL - 1;
86        topL -= (picPitch >> 1);
87        orgY_3 = orgY_2 - (orgPitch >> 1);
88        for (j = 0; j < 8; j++)
89        {
90            temp = *topL++ - orgY_2[j];
91            SBE += ((temp >= 0) ? temp : -temp);
92            temp = *(leftL += (picPitch >> 1)) - *(orgY_3 += (orgPitch >> 1));
93            SBE += ((temp >= 0) ? temp : -temp);
94        }
95
96        /* compare mincost/384 and SBE/64 */
97        ABE = SBE / 64.0;
98        if (ABE*0.8 >= min_cost / 384.0)
99        {
100            intra = false;
101        }
102    }
103
104    return intra;
105}
106
107/* perform searching for MB mode */
108/* assuming that this is done inside the encoding loop,
109no need to call InitNeighborAvailability */
110
111void MBIntraSearch(AVCEncObject *encvid, int mbnum, uint8 *curL, int picPitch)
112{
113    AVCCommonObj *video = encvid->common;
114    AVCFrameIO *currInput = encvid->currInput;
115    AVCMacroblock *currMB = video->currMB;
116    int min_cost;
117    uint8 *orgY;
118    int x_pos = (video->mb_x) << 4;
119    int y_pos = (video->mb_y) << 4;
120    uint32 *saved_inter;
121    int j;
122    int orgPitch = currInput->pitch;
123    bool intra = true;
124
125    currMB->CBP = 0;
126
127    /* first do motion vector and variable block size search */
128    min_cost = encvid->min_cost[mbnum];
129
130    /* now perform intra prediction search */
131    /* need to add the check for encvid->intraSearch[video->mbNum] to skip intra
132       if it's not worth checking. */
133    if (video->slice_type == AVC_P_SLICE)
134    {
135        /* Decide whether intra search is necessary or not */
136        /* This one, we do it in the encoding loop so the neighboring pixel are the
137        actual reconstructed pixels. */
138        intra = IntraDecisionABE(encvid, min_cost, curL, picPitch);
139    }
140
141    if (intra == true || video->slice_type == AVC_I_SLICE)
142    {
143        orgY = currInput->YCbCr[0] + y_pos * orgPitch + x_pos;
144
145        /* i16 mode search */
146        /* generate all the predictions */
147        intrapred_luma_16x16(encvid);
148
149        /* evaluate them one by one */
150        find_cost_16x16(encvid, orgY, &min_cost);
151
152        if (video->slice_type == AVC_P_SLICE)
153        {
154            /* save current inter prediction */
155            saved_inter = encvid->subpel_pred; /* reuse existing buffer */
156            j = 16;
157            curL -= 4;
158            picPitch -= 16;
159            while (j--)
160            {
161                *saved_inter++ = *((uint32*)(curL += 4));
162                *saved_inter++ = *((uint32*)(curL += 4));
163                *saved_inter++ = *((uint32*)(curL += 4));
164                *saved_inter++ = *((uint32*)(curL += 4));
165                curL += picPitch;
166            }
167
168        }
169
170        /* i4 mode search */
171        mb_intra4x4_search(encvid, &min_cost);
172
173        encvid->min_cost[mbnum] = min_cost; /* update min_cost */
174    }
175
176
177    if (currMB->mb_intra)
178    {
179        chroma_intra_search(encvid);
180
181        /* need to set this in order for the MBInterPrediction to work!! */
182        memset(currMB->mvL0, 0, sizeof(int32)*16);
183        currMB->ref_idx_L0[0] = currMB->ref_idx_L0[1] =
184                                    currMB->ref_idx_L0[2] = currMB->ref_idx_L0[3] = -1;
185    }
186    else if (video->slice_type == AVC_P_SLICE && intra == true)
187    {
188        /* restore current inter prediction */
189        saved_inter = encvid->subpel_pred; /* reuse existing buffer */
190        j = 16;
191        curL -= ((picPitch + 16) << 4);
192        while (j--)
193        {
194            *((uint32*)(curL += 4)) = *saved_inter++;
195            *((uint32*)(curL += 4)) = *saved_inter++;
196            *((uint32*)(curL += 4)) = *saved_inter++;
197            *((uint32*)(curL += 4)) = *saved_inter++;
198            curL += picPitch;
199        }
200    }
201
202    return ;
203}
204
205/* generate all the prediction values */
206void intrapred_luma_16x16(AVCEncObject *encvid)
207{
208    AVCCommonObj *video = encvid->common;
209    AVCPictureData *currPic = video->currPic;
210
211    int x_pos = (video->mb_x) << 4;
212    int y_pos = (video->mb_y) << 4;
213    int pitch = currPic->pitch;
214
215    int offset = y_pos * pitch + x_pos;
216
217    uint8 *pred, *top, *left;
218    uint8 *curL = currPic->Sl + offset; /* point to reconstructed frame */
219    uint32 word1, word2, word3, word4;
220    uint32 sum = 0;
221
222    int a_16, b, c, factor_c;
223    uint8 *comp_ref_x0, *comp_ref_x1, *comp_ref_y0, *comp_ref_y1;
224    int H = 0, V = 0, tmp, value;
225    int i;
226
227    if (video->intraAvailB)
228    {
229        //get vertical prediction mode
230        top = curL - pitch;
231
232        pred = encvid->pred_i16[AVC_I16_Vertical] - 16;
233
234        word1 = *((uint32*)(top));  /* read 4 bytes from top */
235        word2 = *((uint32*)(top + 4)); /* read 4 bytes from top */
236        word3 = *((uint32*)(top + 8)); /* read 4 bytes from top */
237        word4 = *((uint32*)(top + 12)); /* read 4 bytes from top */
238
239        for (i = 0; i < 16; i++)
240        {
241            *((uint32*)(pred += 16)) = word1;
242            *((uint32*)(pred + 4)) = word2;
243            *((uint32*)(pred + 8)) = word3;
244            *((uint32*)(pred + 12)) = word4;
245
246        }
247
248        sum = word1 & 0xFF00FF;
249        word1 = (word1 >> 8) & 0xFF00FF;
250        sum += word1;
251        word1 = (word2 & 0xFF00FF);
252        sum += word1;
253        word2 = (word2 >> 8) & 0xFF00FF;
254        sum += word2;
255        word1 = (word3 & 0xFF00FF);
256        sum += word1;
257        word3 = (word3 >> 8) & 0xFF00FF;
258        sum += word3;
259        word1 = (word4 & 0xFF00FF);
260        sum += word1;
261        word4 = (word4 >> 8) & 0xFF00FF;
262        sum += word4;
263
264        sum += (sum >> 16);
265        sum &= 0xFFFF;
266
267        if (!video->intraAvailA)
268        {
269            sum = (sum + 8) >> 4;
270        }
271    }
272
273    if (video->intraAvailA)
274    {
275        // get horizontal mode
276        left = curL - 1 - pitch;
277
278        pred = encvid->pred_i16[AVC_I16_Horizontal] - 16;
279
280        for (i = 0; i < 16; i++)
281        {
282            word1 = *(left += pitch);
283            sum += word1;
284
285            word1 = (word1 << 8) | word1;
286            word1 = (word1 << 16) | word1; /* make it 4 */
287
288            *(uint32*)(pred += 16) = word1;
289            *(uint32*)(pred + 4) = word1;
290            *(uint32*)(pred + 8) = word1;
291            *(uint32*)(pred + 12) = word1;
292        }
293
294        if (!video->intraAvailB)
295        {
296            sum = (sum + 8) >> 4;
297        }
298        else
299        {
300            sum = (sum + 16) >> 5;
301        }
302    }
303
304    // get DC mode
305    if (!video->intraAvailA && !video->intraAvailB)
306    {
307        sum = 0x80808080;
308    }
309    else
310    {
311        sum = (sum << 8) | sum;
312        sum = (sum << 16) | sum;
313    }
314
315    pred = encvid->pred_i16[AVC_I16_DC] - 16;
316    for (i = 0; i < 16; i++)
317    {
318        *((uint32*)(pred += 16)) = sum;
319        *((uint32*)(pred + 4)) = sum;
320        *((uint32*)(pred + 8)) = sum;
321        *((uint32*)(pred + 12)) = sum;
322    }
323
324    // get plane mode
325    if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
326    {
327        pred = encvid->pred_i16[AVC_I16_Plane] - 16;
328
329        comp_ref_x0 = curL - pitch + 8;
330        comp_ref_x1 = curL - pitch + 6;
331        comp_ref_y0 = curL - 1 + (pitch << 3);
332        comp_ref_y1 = curL - 1 + 6 * pitch;
333
334        for (i = 1; i < 8; i++)
335        {
336            H += i * (*comp_ref_x0++ - *comp_ref_x1--);
337            V += i * (*comp_ref_y0 - *comp_ref_y1);
338            comp_ref_y0 += pitch;
339            comp_ref_y1 -= pitch;
340        }
341
342        H += i * (*comp_ref_x0++ - curL[-pitch-1]);
343        V += i * (*comp_ref_y0 - *comp_ref_y1);
344
345
346        a_16 = ((*(curL - pitch + 15) + *(curL - 1 + 15 * pitch)) << 4) + 16;;
347        b = (5 * H + 32) >> 6;
348        c = (5 * V + 32) >> 6;
349
350        tmp = 0;
351        for (i = 0; i < 16; i++)
352        {
353            factor_c = a_16 + c * (tmp++ - 7);
354            factor_c -= 7 * b;
355
356            value = factor_c >> 5;
357            factor_c += b;
358            CLIP_RESULT(value)
359            word1 = value;
360            value = factor_c >> 5;
361            factor_c += b;
362            CLIP_RESULT(value)
363            word1 = (word1) | (value << 8);
364            value = factor_c >> 5;
365            factor_c += b;
366            CLIP_RESULT(value)
367            word1 = (word1) | (value << 16);
368            value = factor_c >> 5;
369            factor_c += b;
370            CLIP_RESULT(value)
371            word1 = (word1) | (value << 24);
372            *((uint32*)(pred += 16)) = word1;
373            value = factor_c >> 5;
374            factor_c += b;
375            CLIP_RESULT(value)
376            word1 = value;
377            value = factor_c >> 5;
378            factor_c += b;
379            CLIP_RESULT(value)
380            word1 = (word1) | (value << 8);
381            value = factor_c >> 5;
382            factor_c += b;
383            CLIP_RESULT(value)
384            word1 = (word1) | (value << 16);
385            value = factor_c >> 5;
386            factor_c += b;
387            CLIP_RESULT(value)
388            word1 = (word1) | (value << 24);
389            *((uint32*)(pred + 4)) = word1;
390            value = factor_c >> 5;
391            factor_c += b;
392            CLIP_RESULT(value)
393            word1 = value;
394            value = factor_c >> 5;
395            factor_c += b;
396            CLIP_RESULT(value)
397            word1 = (word1) | (value << 8);
398            value = factor_c >> 5;
399            factor_c += b;
400            CLIP_RESULT(value)
401            word1 = (word1) | (value << 16);
402            value = factor_c >> 5;
403            factor_c += b;
404            CLIP_RESULT(value)
405            word1 = (word1) | (value << 24);
406            *((uint32*)(pred + 8)) = word1;
407            value = factor_c >> 5;
408            factor_c += b;
409            CLIP_RESULT(value)
410            word1 = value;
411            value = factor_c >> 5;
412            factor_c += b;
413            CLIP_RESULT(value)
414            word1 = (word1) | (value << 8);
415            value = factor_c >> 5;
416            factor_c += b;
417            CLIP_RESULT(value)
418            word1 = (word1) | (value << 16);
419            value = factor_c >> 5;
420            CLIP_RESULT(value)
421            word1 = (word1) | (value << 24);
422            *((uint32*)(pred + 12)) = word1;
423        }
424    }
425
426    return ;
427}
428
429
430/* evaluate each prediction mode of I16 */
431void find_cost_16x16(AVCEncObject *encvid, uint8 *orgY, int *min_cost)
432{
433    AVCCommonObj *video = encvid->common;
434    AVCMacroblock *currMB = video->currMB;
435    int cost;
436    int org_pitch = encvid->currInput->pitch;
437
438    /* evaluate vertical mode */
439    if (video->intraAvailB)
440    {
441        cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_Vertical], *min_cost);
442        if (cost < *min_cost)
443        {
444            *min_cost = cost;
445            currMB->mbMode = AVC_I16;
446            currMB->mb_intra = 1;
447            currMB->i16Mode = AVC_I16_Vertical;
448        }
449    }
450
451
452    /* evaluate horizontal mode */
453    if (video->intraAvailA)
454    {
455        cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_Horizontal], *min_cost);
456        if (cost < *min_cost)
457        {
458            *min_cost = cost;
459            currMB->mbMode = AVC_I16;
460            currMB->mb_intra = 1;
461            currMB->i16Mode = AVC_I16_Horizontal;
462        }
463    }
464
465    /* evaluate DC mode */
466    cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_DC], *min_cost);
467    if (cost < *min_cost)
468    {
469        *min_cost = cost;
470        currMB->mbMode = AVC_I16;
471        currMB->mb_intra = 1;
472        currMB->i16Mode = AVC_I16_DC;
473    }
474
475    /* evaluate plane mode */
476    if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
477    {
478        cost = cost_i16(orgY, org_pitch, encvid->pred_i16[AVC_I16_Plane], *min_cost);
479        if (cost < *min_cost)
480        {
481            *min_cost = cost;
482            currMB->mbMode = AVC_I16;
483            currMB->mb_intra = 1;
484            currMB->i16Mode = AVC_I16_Plane;
485        }
486    }
487
488    return ;
489}
490
491
492int cost_i16(uint8 *org, int org_pitch, uint8 *pred, int min_cost)
493{
494
495    int cost;
496    int j, k;
497    int16 res[256], *pres; // residue
498    int m0, m1, m2, m3;
499
500    // calculate SATD
501    org_pitch -= 16;
502    pres = res;
503    // horizontal transform
504    for (j = 0; j < 16; j++)
505    {
506        k = 4;
507        while (k > 0)
508        {
509            m0 = org[0] - pred[0];
510            m3 = org[3] - pred[3];
511            m0 += m3;
512            m3 = m0 - (m3 << 1);
513            m1 = org[1] - pred[1];
514            m2 = org[2] - pred[2];
515            m1 += m2;
516            m2 = m1 - (m2 << 1);
517            pres[0] = m0 + m1;
518            pres[2] = m0 - m1;
519            pres[1] = m2 + m3;
520            pres[3] = m3 - m2;
521
522            org += 4;
523            pres += 4;
524            pred += 4;
525            k--;
526        }
527        org += org_pitch;
528    }
529    /* vertical transform */
530    cost = 0;
531    for (j = 0; j < 4; j++)
532    {
533        pres = res + (j << 6);
534        k = 16;
535        while (k > 0)
536        {
537            m0 = pres[0];
538            m3 = pres[3<<4];
539            m0 += m3;
540            m3 = m0 - (m3 << 1);
541            m1 = pres[1<<4];
542            m2 = pres[2<<4];
543            m1 += m2;
544            m2 = m1 - (m2 << 1);
545            pres[0] = m0 = m0 + m1;
546
547            if (k&0x3)  // only sum up non DC values.
548            {
549                cost += ((m0 > 0) ? m0 : -m0);
550            }
551
552            m1 = m0 - (m1 << 1);
553            cost += ((m1 > 0) ? m1 : -m1);
554            m3 = m2 + m3;
555            cost += ((m3 > 0) ? m3 : -m3);
556            m2 = m3 - (m2 << 1);
557            cost += ((m2 > 0) ? m2 : -m2);
558
559            pres++;
560            k--;
561        }
562        if ((cost >> 1) > min_cost) /* early drop out */
563        {
564            return (cost >> 1);
565        }
566    }
567
568    /* Hadamard of the DC coefficient */
569    pres = res;
570    k = 4;
571    while (k > 0)
572    {
573        m0 = pres[0];
574        m3 = pres[3<<2];
575        m0 >>= 2;
576        m0 += (m3 >> 2);
577        m3 = m0 - (m3 >> 1);
578        m1 = pres[1<<2];
579        m2 = pres[2<<2];
580        m1 >>= 2;
581        m1 += (m2 >> 2);
582        m2 = m1 - (m2 >> 1);
583        pres[0] = (m0 + m1);
584        pres[2<<2] = (m0 - m1);
585        pres[1<<2] = (m2 + m3);
586        pres[3<<2] = (m3 - m2);
587        pres += (4 << 4);
588        k--;
589    }
590
591    pres = res;
592    k = 4;
593    while (k > 0)
594    {
595        m0 = pres[0];
596        m3 = pres[3<<6];
597        m0 += m3;
598        m3 = m0 - (m3 << 1);
599        m1 = pres[1<<6];
600        m2 = pres[2<<6];
601        m1 += m2;
602        m2 = m1 - (m2 << 1);
603        m0 = m0 + m1;
604        cost += ((m0 >= 0) ? m0 : -m0);
605        m1 = m0 - (m1 << 1);
606        cost += ((m1 >= 0) ? m1 : -m1);
607        m3 = m2 + m3;
608        cost += ((m3 >= 0) ? m3 : -m3);
609        m2 = m3 - (m2 << 1);
610        cost += ((m2 >= 0) ? m2 : -m2);
611        pres += 4;
612
613        if ((cost >> 1) > min_cost) /* early drop out */
614        {
615            return (cost >> 1);
616        }
617
618        k--;
619    }
620
621    return (cost >> 1);
622}
623
624
625void mb_intra4x4_search(AVCEncObject *encvid, int *min_cost)
626{
627    AVCCommonObj *video = encvid->common;
628    AVCMacroblock *currMB = video->currMB;
629    AVCPictureData *currPic = video->currPic;
630    AVCFrameIO *currInput = encvid->currInput;
631    int pitch = currPic->pitch;
632    int org_pitch = currInput->pitch;
633    int offset;
634    uint8 *curL, *comp, *org4, *org8;
635    int y = video->mb_y << 4;
636    int x = video->mb_x << 4;
637
638    int b8, b4, cost4x4, blkidx;
639    int cost = 0;
640    int numcoef;
641    int dummy = 0;
642    int mb_intra = currMB->mb_intra; // save the original value
643
644    offset = y * pitch + x;
645
646    curL = currPic->Sl + offset;
647    org8 = currInput->YCbCr[0] + y * org_pitch + x;
648    video->pred_pitch = 4;
649
650    cost = (int)(6.0 * encvid->lambda_mode + 0.4999);
651    cost <<= 2;
652
653    currMB->mb_intra = 1;  // temporary set this to one to enable the IDCT
654    // operation inside dct_luma
655
656    for (b8 = 0; b8 < 4; b8++)
657    {
658        comp = curL;
659        org4 = org8;
660
661        for (b4 = 0; b4 < 4; b4++)
662        {
663            blkidx = blkIdx2blkXY[b8][b4];
664            cost4x4 = blk_intra4x4_search(encvid, blkidx, comp, org4);
665            cost += cost4x4;
666            if (cost > *min_cost)
667            {
668                currMB->mb_intra = mb_intra; // restore the value
669                return ;
670            }
671
672            /* do residue, Xfrm, Q, invQ, invXfrm, recon and save the DCT coefs.*/
673            video->pred_block = encvid->pred_i4[currMB->i4Mode[blkidx]];
674            numcoef = dct_luma(encvid, blkidx, comp, org4, &dummy);
675            currMB->nz_coeff[blkidx] = numcoef;
676            if (numcoef)
677            {
678                video->cbp4x4 |= (1 << blkidx);
679                currMB->CBP |= (1 << b8);
680            }
681
682            if (b4&1)
683            {
684                comp += ((pitch << 2) - 4);
685                org4 += ((org_pitch << 2) - 4);
686            }
687            else
688            {
689                comp += 4;
690                org4 += 4;
691            }
692        }
693
694        if (b8&1)
695        {
696            curL += ((pitch << 3) - 8);
697            org8 += ((org_pitch << 3) - 8);
698        }
699        else
700        {
701            curL += 8;
702            org8 += 8;
703        }
704    }
705
706    currMB->mb_intra = mb_intra; // restore the value
707
708    if (cost < *min_cost)
709    {
710        *min_cost = cost;
711        currMB->mbMode = AVC_I4;
712        currMB->mb_intra = 1;
713    }
714
715    return ;
716}
717
718
719/* search for i4 mode for a 4x4 block */
720int blk_intra4x4_search(AVCEncObject *encvid, int blkidx, uint8 *cur, uint8 *org)
721{
722    AVCCommonObj *video = encvid->common;
723    AVCNeighborAvailability availability;
724    AVCMacroblock *currMB = video->currMB;
725    bool top_left = FALSE;
726    int pitch = video->currPic->pitch;
727    uint8 mode_avail[AVCNumI4PredMode];
728    uint32 temp, DC;
729    uint8 *pred;
730    int org_pitch = encvid->currInput->pitch;
731    uint16 min_cost, cost;
732
733    int P_x, Q_x, R_x, P_y, Q_y, R_y, D, D0, D1;
734    int P0, Q0, R0, S0, P1, Q1, R1, P2, Q2;
735    uint8 P_A, P_B, P_C, P_D, P_E, P_F, P_G, P_H, P_I, P_J, P_K, P_L, P_X;
736    int r0, r1, r2, r3, r4, r5, r6, r7;
737    int x0, x1, x2, x3, x4, x5;
738    uint32 temp1, temp2;
739
740    int ipmode, mostProbableMode;
741    int fixedcost = 4 * encvid->lambda_mode;
742    int min_sad = 0x7FFF;
743
744    availability.left = TRUE;
745    availability.top = TRUE;
746    if (blkidx <= 3) /* top row block  (!block_y) */
747    { /* check availability up */
748        availability.top = video->intraAvailB ;
749    }
750    if (!(blkidx&0x3)) /* left column block (!block_x)*/
751    { /* check availability left */
752        availability.left = video->intraAvailA ;
753    }
754    availability.top_right = BlkTopRight[blkidx];
755
756    if (availability.top_right == 2)
757    {
758        availability.top_right = video->intraAvailB;
759    }
760    else if (availability.top_right == 3)
761    {
762        availability.top_right = video->intraAvailC;
763    }
764
765    if (availability.top == TRUE)
766    {
767        temp = *(uint32*)(cur - pitch);
768        P_A = temp & 0xFF;
769        P_B = (temp >> 8) & 0xFF;
770        P_C = (temp >> 16) & 0xFF;
771        P_D = (temp >> 24) & 0xFF;
772    }
773    else
774    {
775        P_A = P_B = P_C = P_D = 128;
776    }
777
778    if (availability.top_right == TRUE)
779    {
780        temp = *(uint32*)(cur - pitch + 4);
781        P_E = temp & 0xFF;
782        P_F = (temp >> 8) & 0xFF;
783        P_G = (temp >> 16) & 0xFF;
784        P_H = (temp >> 24) & 0xFF;
785    }
786    else
787    {
788        P_E = P_F = P_G = P_H = 128;
789    }
790
791    if (availability.left == TRUE)
792    {
793        cur--;
794        P_I = *cur;
795        P_J = *(cur += pitch);
796        P_K = *(cur += pitch);
797        P_L = *(cur + pitch);
798        cur -= (pitch << 1);
799        cur++;
800    }
801    else
802    {
803        P_I = P_J = P_K = P_L = 128;
804    }
805
806    /* check if top-left pixel is available */
807    if (((blkidx > 3) && (blkidx&0x3)) || ((blkidx > 3) && video->intraAvailA)
808            || ((blkidx&0x3) && video->intraAvailB)
809            || (video->intraAvailA && video->intraAvailD && video->intraAvailB))
810    {
811        top_left = TRUE;
812        P_X = *(cur - pitch - 1);
813    }
814    else
815    {
816        P_X = 128;
817    }
818
819    //===== INTRA PREDICTION FOR 4x4 BLOCK =====
820    /* vertical */
821    mode_avail[AVC_I4_Vertical] = 0;
822    if (availability.top)
823    {
824        mode_avail[AVC_I4_Vertical] = 1;
825        pred = encvid->pred_i4[AVC_I4_Vertical];
826
827        temp = (P_D << 24) | (P_C << 16) | (P_B << 8) | P_A ;
828        *((uint32*)pred) =  temp; /* write 4 at a time */
829        *((uint32*)(pred += 4)) =  temp;
830        *((uint32*)(pred += 4)) =  temp;
831        *((uint32*)(pred += 4)) =  temp;
832    }
833    /* horizontal */
834    mode_avail[AVC_I4_Horizontal] = 0;
835    mode_avail[AVC_I4_Horizontal_Up] = 0;
836    if (availability.left)
837    {
838        mode_avail[AVC_I4_Horizontal] = 1;
839        pred = encvid->pred_i4[AVC_I4_Horizontal];
840
841        temp = P_I | (P_I << 8);
842        temp = temp | (temp << 16);
843        *((uint32*)pred) = temp;
844        temp = P_J | (P_J << 8);
845        temp = temp | (temp << 16);
846        *((uint32*)(pred += 4)) = temp;
847        temp = P_K | (P_K << 8);
848        temp = temp | (temp << 16);
849        *((uint32*)(pred += 4)) = temp;
850        temp = P_L | (P_L << 8);
851        temp = temp | (temp << 16);
852        *((uint32*)(pred += 4)) = temp;
853
854        mode_avail[AVC_I4_Horizontal_Up] = 1;
855        pred = encvid->pred_i4[AVC_I4_Horizontal_Up];
856
857        Q0 = (P_J + P_K + 1) >> 1;
858        Q1 = (P_J + (P_K << 1) + P_L + 2) >> 2;
859        P0 = ((P_I + P_J + 1) >> 1);
860        P1 = ((P_I + (P_J << 1) + P_K + 2) >> 2);
861
862        temp = P0 | (P1 << 8);      // [P0 P1 Q0 Q1]
863        temp |= (Q0 << 16);     // [Q0 Q1 R0 DO]
864        temp |= (Q1 << 24);     // [R0 D0 D1 D1]
865        *((uint32*)pred) = temp;      // [D1 D1 D1 D1]
866
867        D0 = (P_K + 3 * P_L + 2) >> 2;
868        R0 = (P_K + P_L + 1) >> 1;
869
870        temp = Q0 | (Q1 << 8);
871        temp |= (R0 << 16);
872        temp |= (D0 << 24);
873        *((uint32*)(pred += 4)) = temp;
874
875        D1 = P_L;
876
877        temp = R0 | (D0 << 8);
878        temp |= (D1 << 16);
879        temp |= (D1 << 24);
880        *((uint32*)(pred += 4)) = temp;
881
882        temp = D1 | (D1 << 8);
883        temp |= (temp << 16);
884        *((uint32*)(pred += 4)) = temp;
885    }
886    /* DC */
887    mode_avail[AVC_I4_DC] = 1;
888    pred = encvid->pred_i4[AVC_I4_DC];
889    if (availability.left)
890    {
891        DC = P_I + P_J + P_K + P_L;
892
893        if (availability.top)
894        {
895            DC = (P_A + P_B + P_C + P_D + DC + 4) >> 3;
896        }
897        else
898        {
899            DC = (DC + 2) >> 2;
900
901        }
902    }
903    else if (availability.top)
904    {
905        DC = (P_A + P_B + P_C + P_D + 2) >> 2;
906
907    }
908    else
909    {
910        DC = 128;
911    }
912
913    temp = DC | (DC << 8);
914    temp = temp | (temp << 16);
915    *((uint32*)pred) = temp;
916    *((uint32*)(pred += 4)) = temp;
917    *((uint32*)(pred += 4)) = temp;
918    *((uint32*)(pred += 4)) = temp;
919
920    /* Down-left */
921    mode_avail[AVC_I4_Diagonal_Down_Left] = 0;
922
923    if (availability.top)
924    {
925        mode_avail[AVC_I4_Diagonal_Down_Left] = 1;
926
927        pred = encvid->pred_i4[AVC_I4_Diagonal_Down_Left];
928
929        r0 = P_A;
930        r1 = P_B;
931        r2 = P_C;
932        r3 = P_D;
933
934        r0 += (r1 << 1);
935        r0 += r2;
936        r0 += 2;
937        r0 >>= 2;
938        r1 += (r2 << 1);
939        r1 += r3;
940        r1 += 2;
941        r1 >>= 2;
942
943        if (availability.top_right)
944        {
945            r4 = P_E;
946            r5 = P_F;
947            r6 = P_G;
948            r7 = P_H;
949
950            r2 += (r3 << 1);
951            r2 += r4;
952            r2 += 2;
953            r2 >>= 2;
954            r3 += (r4 << 1);
955            r3 += r5;
956            r3 += 2;
957            r3 >>= 2;
958            r4 += (r5 << 1);
959            r4 += r6;
960            r4 += 2;
961            r4 >>= 2;
962            r5 += (r6 << 1);
963            r5 += r7;
964            r5 += 2;
965            r5 >>= 2;
966            r6 += (3 * r7);
967            r6 += 2;
968            r6 >>= 2;
969            temp = r0 | (r1 << 8);
970            temp |= (r2 << 16);
971            temp |= (r3 << 24);
972            *((uint32*)pred) = temp;
973
974            temp = (temp >> 8) | (r4 << 24);
975            *((uint32*)(pred += 4)) = temp;
976
977            temp = (temp >> 8) | (r5 << 24);
978            *((uint32*)(pred += 4)) = temp;
979
980            temp = (temp >> 8) | (r6 << 24);
981            *((uint32*)(pred += 4)) = temp;
982        }
983        else
984        {
985            r2 += (r3 * 3);
986            r2 += 2;
987            r2 >>= 2;
988            r3 = ((r3 << 2) + 2);
989            r3 >>= 2;
990
991            temp = r0 | (r1 << 8);
992            temp |= (r2 << 16);
993            temp |= (r3 << 24);
994            *((uint32*)pred) = temp;
995
996            temp = (temp >> 8) | (r3 << 24);
997            *((uint32*)(pred += 4)) = temp;
998
999            temp = (temp >> 8) | (r3 << 24);
1000            *((uint32*)(pred += 4)) = temp;
1001
1002            temp = (temp >> 8) | (r3 << 24);
1003            *((uint32*)(pred += 4)) = temp;
1004
1005        }
1006    }
1007
1008    /* Down Right */
1009    mode_avail[AVC_I4_Diagonal_Down_Right] = 0;
1010    /* Diagonal Vertical Right */
1011    mode_avail[AVC_I4_Vertical_Right] = 0;
1012    /* Horizontal Down */
1013    mode_avail[AVC_I4_Horizontal_Down] = 0;
1014
1015    if (top_left == TRUE)
1016    {
1017        /* Down Right */
1018        mode_avail[AVC_I4_Diagonal_Down_Right] = 1;
1019        pred = encvid->pred_i4[AVC_I4_Diagonal_Down_Right];
1020
1021        Q_x = (P_A + 2 * P_B + P_C + 2) >> 2;
1022        R_x = (P_B + 2 * P_C + P_D + 2) >> 2;
1023        P_x = (P_X + 2 * P_A + P_B + 2) >> 2;
1024        D   = (P_A + 2 * P_X + P_I + 2) >> 2;
1025        P_y = (P_X + 2 * P_I + P_J + 2) >> 2;
1026        Q_y = (P_I + 2 * P_J + P_K + 2) >> 2;
1027        R_y = (P_J + 2 * P_K + P_L + 2) >> 2;
1028
1029        /* we can pack these */
1030        temp =  D | (P_x << 8);   //[D   P_x Q_x R_x]
1031        //[P_y D   P_x Q_x]
1032        temp |= (Q_x << 16); //[Q_y P_y D   P_x]
1033        temp |= (R_x << 24);  //[R_y Q_y P_y D  ]
1034        *((uint32*)pred) = temp;
1035
1036        temp =  P_y | (D << 8);
1037        temp |= (P_x << 16);
1038        temp |= (Q_x << 24);
1039        *((uint32*)(pred += 4)) = temp;
1040
1041        temp =  Q_y | (P_y << 8);
1042        temp |= (D << 16);
1043        temp |= (P_x << 24);
1044        *((uint32*)(pred += 4)) = temp;
1045
1046        temp = R_y | (Q_y << 8);
1047        temp |= (P_y << 16);
1048        temp |= (D << 24);
1049        *((uint32*)(pred += 4)) = temp;
1050
1051
1052        /* Diagonal Vertical Right */
1053        mode_avail[AVC_I4_Vertical_Right] = 1;
1054        pred = encvid->pred_i4[AVC_I4_Vertical_Right];
1055
1056        Q0 = P_A + P_B + 1;
1057        R0 = P_B + P_C + 1;
1058        S0 = P_C + P_D + 1;
1059        P0 = P_X + P_A + 1;
1060        D = (P_I + 2 * P_X + P_A + 2) >> 2;
1061
1062        P1 = (P0 + Q0) >> 2;
1063        Q1 = (Q0 + R0) >> 2;
1064        R1 = (R0 + S0) >> 2;
1065
1066        P0 >>= 1;
1067        Q0 >>= 1;
1068        R0 >>= 1;
1069        S0 >>= 1;
1070
1071        P2 = (P_X + 2 * P_I + P_J + 2) >> 2;
1072        Q2 = (P_I + 2 * P_J + P_K + 2) >> 2;
1073
1074        temp =  P0 | (Q0 << 8);  //[P0 Q0 R0 S0]
1075        //[D  P1 Q1 R1]
1076        temp |= (R0 << 16); //[P2 P0 Q0 R0]
1077        temp |= (S0 << 24); //[Q2 D  P1 Q1]
1078        *((uint32*)pred) =  temp;
1079
1080        temp =  D | (P1 << 8);
1081        temp |= (Q1 << 16);
1082        temp |= (R1 << 24);
1083        *((uint32*)(pred += 4)) =  temp;
1084
1085        temp = P2 | (P0 << 8);
1086        temp |= (Q0 << 16);
1087        temp |= (R0 << 24);
1088        *((uint32*)(pred += 4)) =  temp;
1089
1090        temp = Q2 | (D << 8);
1091        temp |= (P1 << 16);
1092        temp |= (Q1 << 24);
1093        *((uint32*)(pred += 4)) =  temp;
1094
1095
1096        /* Horizontal Down */
1097        mode_avail[AVC_I4_Horizontal_Down] = 1;
1098        pred = encvid->pred_i4[AVC_I4_Horizontal_Down];
1099
1100
1101        Q2 = (P_A + 2 * P_B + P_C + 2) >> 2;
1102        P2 = (P_X + 2 * P_A + P_B + 2) >> 2;
1103        D = (P_I + 2 * P_X + P_A + 2) >> 2;
1104        P0 = P_X + P_I + 1;
1105        Q0 = P_I + P_J + 1;
1106        R0 = P_J + P_K + 1;
1107        S0 = P_K + P_L + 1;
1108
1109        P1 = (P0 + Q0) >> 2;
1110        Q1 = (Q0 + R0) >> 2;
1111        R1 = (R0 + S0) >> 2;
1112
1113        P0 >>= 1;
1114        Q0 >>= 1;
1115        R0 >>= 1;
1116        S0 >>= 1;
1117
1118
1119        /* we can pack these */
1120        temp = P0 | (D << 8);   //[P0 D  P2 Q2]
1121        //[Q0 P1 P0 D ]
1122        temp |= (P2 << 16);  //[R0 Q1 Q0 P1]
1123        temp |= (Q2 << 24); //[S0 R1 R0 Q1]
1124        *((uint32*)pred) = temp;
1125
1126        temp = Q0 | (P1 << 8);
1127        temp |= (P0 << 16);
1128        temp |= (D << 24);
1129        *((uint32*)(pred += 4)) = temp;
1130
1131        temp = R0 | (Q1 << 8);
1132        temp |= (Q0 << 16);
1133        temp |= (P1 << 24);
1134        *((uint32*)(pred += 4)) = temp;
1135
1136        temp = S0 | (R1 << 8);
1137        temp |= (R0 << 16);
1138        temp |= (Q1 << 24);
1139        *((uint32*)(pred += 4)) = temp;
1140
1141    }
1142
1143    /* vertical left */
1144    mode_avail[AVC_I4_Vertical_Left] = 0;
1145    if (availability.top)
1146    {
1147        mode_avail[AVC_I4_Vertical_Left] = 1;
1148        pred = encvid->pred_i4[AVC_I4_Vertical_Left];
1149
1150        x0 = P_A + P_B + 1;
1151        x1 = P_B + P_C + 1;
1152        x2 = P_C + P_D + 1;
1153        if (availability.top_right)
1154        {
1155            x3 = P_D + P_E + 1;
1156            x4 = P_E + P_F + 1;
1157            x5 = P_F + P_G + 1;
1158        }
1159        else
1160        {
1161            x3 = x4 = x5 = (P_D << 1) + 1;
1162        }
1163
1164        temp1 = (x0 >> 1);
1165        temp1 |= ((x1 >> 1) << 8);
1166        temp1 |= ((x2 >> 1) << 16);
1167        temp1 |= ((x3 >> 1) << 24);
1168
1169        *((uint32*)pred) = temp1;
1170
1171        temp2 = ((x0 + x1) >> 2);
1172        temp2 |= (((x1 + x2) >> 2) << 8);
1173        temp2 |= (((x2 + x3) >> 2) << 16);
1174        temp2 |= (((x3 + x4) >> 2) << 24);
1175
1176        *((uint32*)(pred += 4)) = temp2;
1177
1178        temp1 = (temp1 >> 8) | ((x4 >> 1) << 24);   /* rotate out old value */
1179        *((uint32*)(pred += 4)) = temp1;
1180
1181        temp2 = (temp2 >> 8) | (((x4 + x5) >> 2) << 24); /* rotate out old value */
1182        *((uint32*)(pred += 4)) = temp2;
1183    }
1184
1185    //===== LOOP OVER ALL 4x4 INTRA PREDICTION MODES =====
1186    // can re-order the search here instead of going in order
1187
1188    // find most probable mode
1189    encvid->mostProbableI4Mode[blkidx] = mostProbableMode = FindMostProbableI4Mode(video, blkidx);
1190
1191    min_cost = 0xFFFF;
1192
1193    for (ipmode = 0; ipmode < AVCNumI4PredMode; ipmode++)
1194    {
1195        if (mode_avail[ipmode] == TRUE)
1196        {
1197            cost  = (ipmode == mostProbableMode) ? 0 : fixedcost;
1198            pred = encvid->pred_i4[ipmode];
1199
1200            cost_i4(org, org_pitch, pred, &cost);
1201
1202            if (cost < min_cost)
1203            {
1204                currMB->i4Mode[blkidx] = (AVCIntra4x4PredMode)ipmode;
1205                min_cost   = cost;
1206                min_sad = cost - ((ipmode == mostProbableMode) ? 0 : fixedcost);
1207            }
1208        }
1209    }
1210
1211    if (blkidx == 0)
1212    {
1213        encvid->i4_sad = min_sad;
1214    }
1215    else
1216    {
1217        encvid->i4_sad += min_sad;
1218    }
1219
1220    return min_cost;
1221}
1222
1223int FindMostProbableI4Mode(AVCCommonObj *video, int blkidx)
1224{
1225    int dcOnlyPredictionFlag;
1226    AVCMacroblock *currMB = video->currMB;
1227    int intra4x4PredModeA, intra4x4PredModeB, predIntra4x4PredMode;
1228
1229
1230    dcOnlyPredictionFlag = 0;
1231    if (blkidx&0x3)
1232    {
1233        intra4x4PredModeA = currMB->i4Mode[blkidx-1]; // block to the left
1234    }
1235    else /* for blk 0, 4, 8, 12 */
1236    {
1237        if (video->intraAvailA)
1238        {
1239            if (video->mblock[video->mbAddrA].mbMode == AVC_I4)
1240            {
1241                intra4x4PredModeA = video->mblock[video->mbAddrA].i4Mode[blkidx + 3];
1242            }
1243            else
1244            {
1245                intra4x4PredModeA = AVC_I4_DC;
1246            }
1247        }
1248        else
1249        {
1250            dcOnlyPredictionFlag = 1;
1251            goto PRED_RESULT_READY;  // skip below
1252        }
1253    }
1254
1255    if (blkidx >> 2)
1256    {
1257        intra4x4PredModeB = currMB->i4Mode[blkidx-4]; // block above
1258    }
1259    else /* block 0, 1, 2, 3 */
1260    {
1261        if (video->intraAvailB)
1262        {
1263            if (video->mblock[video->mbAddrB].mbMode == AVC_I4)
1264            {
1265                intra4x4PredModeB = video->mblock[video->mbAddrB].i4Mode[blkidx+12];
1266            }
1267            else
1268            {
1269                intra4x4PredModeB = AVC_I4_DC;
1270            }
1271        }
1272        else
1273        {
1274            dcOnlyPredictionFlag = 1;
1275        }
1276    }
1277
1278PRED_RESULT_READY:
1279    if (dcOnlyPredictionFlag)
1280    {
1281        intra4x4PredModeA = intra4x4PredModeB = AVC_I4_DC;
1282    }
1283
1284    predIntra4x4PredMode = AVC_MIN(intra4x4PredModeA, intra4x4PredModeB);
1285
1286    return predIntra4x4PredMode;
1287}
1288
1289void cost_i4(uint8 *org, int org_pitch, uint8 *pred, uint16 *cost)
1290{
1291    int k;
1292    int16 res[16], *pres;
1293    int m0, m1, m2, m3, tmp1;
1294    int satd = 0;
1295
1296    pres = res;
1297    // horizontal transform
1298    k = 4;
1299    while (k > 0)
1300    {
1301        m0 = org[0] - pred[0];
1302        m3 = org[3] - pred[3];
1303        m0 += m3;
1304        m3 = m0 - (m3 << 1);
1305        m1 = org[1] - pred[1];
1306        m2 = org[2] - pred[2];
1307        m1 += m2;
1308        m2 = m1 - (m2 << 1);
1309        pres[0] = m0 + m1;
1310        pres[2] = m0 - m1;
1311        pres[1] = m2 + m3;
1312        pres[3] = m3 - m2;
1313
1314        org += org_pitch;
1315        pres += 4;
1316        pred += 4;
1317        k--;
1318    }
1319    /* vertical transform */
1320    pres = res;
1321    k = 4;
1322    while (k > 0)
1323    {
1324        m0 = pres[0];
1325        m3 = pres[12];
1326        m0 += m3;
1327        m3 = m0 - (m3 << 1);
1328        m1 = pres[4];
1329        m2 = pres[8];
1330        m1 += m2;
1331        m2 = m1 - (m2 << 1);
1332        pres[0] = m0 + m1;
1333        pres[8] = m0 - m1;
1334        pres[4] = m2 + m3;
1335        pres[12] = m3 - m2;
1336
1337        pres++;
1338        k--;
1339
1340    }
1341
1342    pres = res;
1343    k = 4;
1344    while (k > 0)
1345    {
1346        tmp1 = *pres++;
1347        satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
1348        tmp1 = *pres++;
1349        satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
1350        tmp1 = *pres++;
1351        satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
1352        tmp1 = *pres++;
1353        satd += ((tmp1 >= 0) ? tmp1 : -tmp1);
1354        k--;
1355    }
1356
1357    satd = (satd + 1) >> 1;
1358    *cost += satd;
1359
1360    return ;
1361}
1362
1363void chroma_intra_search(AVCEncObject *encvid)
1364{
1365    AVCCommonObj *video = encvid->common;
1366    AVCPictureData *currPic = video->currPic;
1367
1368    int x_pos = video->mb_x << 3;
1369    int y_pos = video->mb_y << 3;
1370    int pitch = currPic->pitch >> 1;
1371    int offset = y_pos * pitch + x_pos;
1372
1373    uint8 *comp_ref_x, *comp_ref_y, *pred;
1374    int  sum_x0, sum_x1, sum_y0, sum_y1;
1375    int pred_0[2], pred_1[2], pred_2[2], pred_3[2];
1376    uint32 pred_a, pred_b, pred_c, pred_d;
1377    int i, j, component;
1378    int a_16, b, c, factor_c, topleft;
1379    int H, V, value;
1380    uint8 *comp_ref_x0, *comp_ref_x1,  *comp_ref_y0, *comp_ref_y1;
1381
1382    uint8 *curCb = currPic->Scb + offset;
1383    uint8 *curCr = currPic->Scr + offset;
1384
1385    uint8 *orgCb, *orgCr;
1386    AVCFrameIO *currInput = encvid->currInput;
1387    AVCMacroblock *currMB = video->currMB;
1388    int org_pitch;
1389    int cost, mincost;
1390
1391    /* evaluate DC mode */
1392    if (video->intraAvailB & video->intraAvailA)
1393    {
1394        comp_ref_x = curCb - pitch;
1395        comp_ref_y = curCb - 1;
1396
1397        for (i = 0; i < 2; i++)
1398        {
1399            pred_a = *((uint32*)comp_ref_x);
1400            comp_ref_x += 4;
1401            pred_b = (pred_a >> 8) & 0xFF00FF;
1402            pred_a &= 0xFF00FF;
1403            pred_a += pred_b;
1404            pred_a += (pred_a >> 16);
1405            sum_x0 = pred_a & 0xFFFF;
1406
1407            pred_a = *((uint32*)comp_ref_x);
1408            pred_b = (pred_a >> 8) & 0xFF00FF;
1409            pred_a &= 0xFF00FF;
1410            pred_a += pred_b;
1411            pred_a += (pred_a >> 16);
1412            sum_x1 = pred_a & 0xFFFF;
1413
1414            pred_1[i] = (sum_x1 + 2) >> 2;
1415
1416            sum_y0 = *comp_ref_y;
1417            sum_y0 += *(comp_ref_y += pitch);
1418            sum_y0 += *(comp_ref_y += pitch);
1419            sum_y0 += *(comp_ref_y += pitch);
1420
1421            sum_y1 = *(comp_ref_y += pitch);
1422            sum_y1 += *(comp_ref_y += pitch);
1423            sum_y1 += *(comp_ref_y += pitch);
1424            sum_y1 += *(comp_ref_y += pitch);
1425
1426            pred_2[i] = (sum_y1 + 2) >> 2;
1427
1428            pred_0[i] = (sum_y0 + sum_x0 + 4) >> 3;
1429            pred_3[i] = (sum_y1 + sum_x1 + 4) >> 3;
1430
1431            comp_ref_x = curCr - pitch;
1432            comp_ref_y = curCr - 1;
1433        }
1434    }
1435
1436    else if (video->intraAvailA)
1437    {
1438        comp_ref_y = curCb - 1;
1439        for (i = 0; i < 2; i++)
1440        {
1441            sum_y0 = *comp_ref_y;
1442            sum_y0 += *(comp_ref_y += pitch);
1443            sum_y0 += *(comp_ref_y += pitch);
1444            sum_y0 += *(comp_ref_y += pitch);
1445
1446            sum_y1 = *(comp_ref_y += pitch);
1447            sum_y1 += *(comp_ref_y += pitch);
1448            sum_y1 += *(comp_ref_y += pitch);
1449            sum_y1 += *(comp_ref_y += pitch);
1450
1451            pred_0[i] = pred_1[i] = (sum_y0 + 2) >> 2;
1452            pred_2[i] = pred_3[i] = (sum_y1 + 2) >> 2;
1453
1454            comp_ref_y = curCr - 1;
1455        }
1456    }
1457    else if (video->intraAvailB)
1458    {
1459        comp_ref_x = curCb - pitch;
1460        for (i = 0; i < 2; i++)
1461        {
1462            pred_a = *((uint32*)comp_ref_x);
1463            comp_ref_x += 4;
1464            pred_b = (pred_a >> 8) & 0xFF00FF;
1465            pred_a &= 0xFF00FF;
1466            pred_a += pred_b;
1467            pred_a += (pred_a >> 16);
1468            sum_x0 = pred_a & 0xFFFF;
1469
1470            pred_a = *((uint32*)comp_ref_x);
1471            pred_b = (pred_a >> 8) & 0xFF00FF;
1472            pred_a &= 0xFF00FF;
1473            pred_a += pred_b;
1474            pred_a += (pred_a >> 16);
1475            sum_x1 = pred_a & 0xFFFF;
1476
1477            pred_0[i] = pred_2[i] = (sum_x0 + 2) >> 2;
1478            pred_1[i] = pred_3[i] = (sum_x1 + 2) >> 2;
1479
1480            comp_ref_x = curCr - pitch;
1481        }
1482    }
1483    else
1484    {
1485        pred_0[0] = pred_0[1] = pred_1[0] = pred_1[1] =
1486                                                pred_2[0] = pred_2[1] = pred_3[0] = pred_3[1] = 128;
1487    }
1488
1489    pred = encvid->pred_ic[AVC_IC_DC];
1490
1491    pred_a = pred_0[0];
1492    pred_b = pred_1[0];
1493    pred_a |= (pred_a << 8);
1494    pred_a |= (pred_a << 16);
1495    pred_b |= (pred_b << 8);
1496    pred_b |= (pred_b << 16);
1497
1498    pred_c = pred_0[1];
1499    pred_d = pred_1[1];
1500    pred_c |= (pred_c << 8);
1501    pred_c |= (pred_c << 16);
1502    pred_d |= (pred_d << 8);
1503    pred_d |= (pred_d << 16);
1504
1505
1506    for (j = 0; j < 4; j++) /* 4 lines */
1507    {
1508        *((uint32*)pred) = pred_a;
1509        *((uint32*)(pred + 4)) = pred_b;
1510        *((uint32*)(pred + 8)) = pred_c;
1511        *((uint32*)(pred + 12)) = pred_d;
1512        pred += 16; /* move to the next line */
1513    }
1514
1515    pred_a = pred_2[0];
1516    pred_b = pred_3[0];
1517    pred_a |= (pred_a << 8);
1518    pred_a |= (pred_a << 16);
1519    pred_b |= (pred_b << 8);
1520    pred_b |= (pred_b << 16);
1521
1522    pred_c = pred_2[1];
1523    pred_d = pred_3[1];
1524    pred_c |= (pred_c << 8);
1525    pred_c |= (pred_c << 16);
1526    pred_d |= (pred_d << 8);
1527    pred_d |= (pred_d << 16);
1528
1529    for (j = 0; j < 4; j++) /* 4 lines */
1530    {
1531        *((uint32*)pred) = pred_a;
1532        *((uint32*)(pred + 4)) = pred_b;
1533        *((uint32*)(pred + 8)) = pred_c;
1534        *((uint32*)(pred + 12)) = pred_d;
1535        pred += 16; /* move to the next line */
1536    }
1537
1538    /* predict horizontal mode */
1539    if (video->intraAvailA)
1540    {
1541        comp_ref_y = curCb - 1;
1542        comp_ref_x = curCr - 1;
1543        pred = encvid->pred_ic[AVC_IC_Horizontal];
1544
1545        for (i = 4; i < 6; i++)
1546        {
1547            for (j = 0; j < 4; j++)
1548            {
1549                pred_a = *comp_ref_y;
1550                comp_ref_y += pitch;
1551                pred_a |= (pred_a << 8);
1552                pred_a |= (pred_a << 16);
1553                *((uint32*)pred) = pred_a;
1554                *((uint32*)(pred + 4)) = pred_a;
1555
1556                pred_a = *comp_ref_x;
1557                comp_ref_x += pitch;
1558                pred_a |= (pred_a << 8);
1559                pred_a |= (pred_a << 16);
1560                *((uint32*)(pred + 8)) = pred_a;
1561                *((uint32*)(pred + 12)) = pred_a;
1562
1563                pred += 16;
1564            }
1565        }
1566    }
1567
1568    /* vertical mode */
1569    if (video->intraAvailB)
1570    {
1571        comp_ref_x = curCb - pitch;
1572        comp_ref_y = curCr - pitch;
1573        pred = encvid->pred_ic[AVC_IC_Vertical];
1574
1575        pred_a = *((uint32*)comp_ref_x);
1576        pred_b = *((uint32*)(comp_ref_x + 4));
1577        pred_c = *((uint32*)comp_ref_y);
1578        pred_d = *((uint32*)(comp_ref_y + 4));
1579
1580        for (j = 0; j < 8; j++)
1581        {
1582            *((uint32*)pred) = pred_a;
1583            *((uint32*)(pred + 4)) = pred_b;
1584            *((uint32*)(pred + 8)) = pred_c;
1585            *((uint32*)(pred + 12)) = pred_d;
1586            pred += 16;
1587        }
1588    }
1589
1590    /* Intra_Chroma_Plane */
1591    if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
1592    {
1593        comp_ref_x = curCb - pitch;
1594        comp_ref_y = curCb - 1;
1595        topleft = curCb[-pitch-1];
1596
1597        pred = encvid->pred_ic[AVC_IC_Plane];
1598        for (component = 0; component < 2; component++)
1599        {
1600            H = V = 0;
1601            comp_ref_x0 = comp_ref_x + 4;
1602            comp_ref_x1 = comp_ref_x + 2;
1603            comp_ref_y0 = comp_ref_y + (pitch << 2);
1604            comp_ref_y1 = comp_ref_y + (pitch << 1);
1605            for (i = 1; i < 4; i++)
1606            {
1607                H += i * (*comp_ref_x0++ - *comp_ref_x1--);
1608                V += i * (*comp_ref_y0 - *comp_ref_y1);
1609                comp_ref_y0 += pitch;
1610                comp_ref_y1 -= pitch;
1611            }
1612            H += i * (*comp_ref_x0++ - topleft);
1613            V += i * (*comp_ref_y0 - *comp_ref_y1);
1614
1615            a_16 = ((*(comp_ref_x + 7) + *(comp_ref_y + 7 * pitch)) << 4) + 16;
1616            b = (17 * H + 16) >> 5;
1617            c = (17 * V + 16) >> 5;
1618
1619            pred_a = 0;
1620            for (i = 4; i < 6; i++)
1621            {
1622                for (j = 0; j < 4; j++)
1623                {
1624                    factor_c = a_16 + c * (pred_a++ - 3);
1625
1626                    factor_c -= 3 * b;
1627
1628                    value = factor_c >> 5;
1629                    factor_c += b;
1630                    CLIP_RESULT(value)
1631                    pred_b = value;
1632                    value = factor_c >> 5;
1633                    factor_c += b;
1634                    CLIP_RESULT(value)
1635                    pred_b |= (value << 8);
1636                    value = factor_c >> 5;
1637                    factor_c += b;
1638                    CLIP_RESULT(value)
1639                    pred_b |= (value << 16);
1640                    value = factor_c >> 5;
1641                    factor_c += b;
1642                    CLIP_RESULT(value)
1643                    pred_b |= (value << 24);
1644                    *((uint32*)pred) = pred_b;
1645
1646                    value = factor_c >> 5;
1647                    factor_c += b;
1648                    CLIP_RESULT(value)
1649                    pred_b = value;
1650                    value = factor_c >> 5;
1651                    factor_c += b;
1652                    CLIP_RESULT(value)
1653                    pred_b |= (value << 8);
1654                    value = factor_c >> 5;
1655                    factor_c += b;
1656                    CLIP_RESULT(value)
1657                    pred_b |= (value << 16);
1658                    value = factor_c >> 5;
1659                    factor_c += b;
1660                    CLIP_RESULT(value)
1661                    pred_b |= (value << 24);
1662                    *((uint32*)(pred + 4)) = pred_b;
1663                    pred += 16;
1664                }
1665            }
1666
1667            pred -= 120; /* point to cr */
1668            comp_ref_x = curCr - pitch;
1669            comp_ref_y = curCr - 1;
1670            topleft = curCr[-pitch-1];
1671        }
1672    }
1673
1674    /* now evaluate it */
1675
1676    org_pitch = (currInput->pitch) >> 1;
1677    offset = x_pos + y_pos * org_pitch;
1678
1679    orgCb = currInput->YCbCr[1] + offset;
1680    orgCr = currInput->YCbCr[2] + offset;
1681
1682    mincost = 0x7fffffff;
1683    cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_DC], mincost);
1684    if (cost < mincost)
1685    {
1686        mincost = cost;
1687        currMB->intra_chroma_pred_mode = AVC_IC_DC;
1688    }
1689
1690    if (video->intraAvailA)
1691    {
1692        cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_Horizontal], mincost);
1693        if (cost < mincost)
1694        {
1695            mincost = cost;
1696            currMB->intra_chroma_pred_mode = AVC_IC_Horizontal;
1697        }
1698    }
1699
1700    if (video->intraAvailB)
1701    {
1702        cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_Vertical], mincost);
1703        if (cost < mincost)
1704        {
1705            mincost = cost;
1706            currMB->intra_chroma_pred_mode = AVC_IC_Vertical;
1707        }
1708    }
1709
1710    if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
1711    {
1712        cost = SATDChroma(orgCb, orgCr, org_pitch, encvid->pred_ic[AVC_IC_Plane], mincost);
1713        if (cost < mincost)
1714        {
1715            mincost = cost;
1716            currMB->intra_chroma_pred_mode = AVC_IC_Plane;
1717        }
1718    }
1719
1720
1721    return ;
1722}
1723
1724
1725int SATDChroma(uint8 *orgCb, uint8 *orgCr, int org_pitch, uint8 *pred, int min_cost)
1726{
1727    int cost;
1728    /* first take difference between orgCb, orgCr and pred */
1729    int16 res[128], *pres; // residue
1730    int m0, m1, m2, m3, tmp1;
1731    int j, k;
1732
1733    pres = res;
1734    org_pitch -= 8;
1735    // horizontal transform
1736    for (j = 0; j < 8; j++)
1737    {
1738        k = 2;
1739        while (k > 0)
1740        {
1741            m0 = orgCb[0] - pred[0];
1742            m3 = orgCb[3] - pred[3];
1743            m0 += m3;
1744            m3 = m0 - (m3 << 1);
1745            m1 = orgCb[1] - pred[1];
1746            m2 = orgCb[2] - pred[2];
1747            m1 += m2;
1748            m2 = m1 - (m2 << 1);
1749            pres[0] = m0 + m1;
1750            pres[2] = m0 - m1;
1751            pres[1] = m2 + m3;
1752            pres[3] = m3 - m2;
1753
1754            orgCb += 4;
1755            pres += 4;
1756            pred += 4;
1757            k--;
1758        }
1759        orgCb += org_pitch;
1760        k = 2;
1761        while (k > 0)
1762        {
1763            m0 = orgCr[0] - pred[0];
1764            m3 = orgCr[3] - pred[3];
1765            m0 += m3;
1766            m3 = m0 - (m3 << 1);
1767            m1 = orgCr[1] - pred[1];
1768            m2 = orgCr[2] - pred[2];
1769            m1 += m2;
1770            m2 = m1 - (m2 << 1);
1771            pres[0] = m0 + m1;
1772            pres[2] = m0 - m1;
1773            pres[1] = m2 + m3;
1774            pres[3] = m3 - m2;
1775
1776            orgCr += 4;
1777            pres += 4;
1778            pred += 4;
1779            k--;
1780        }
1781        orgCr += org_pitch;
1782    }
1783
1784    /* vertical transform */
1785    for (j = 0; j < 2; j++)
1786    {
1787        pres = res + (j << 6);
1788        k = 16;
1789        while (k > 0)
1790        {
1791            m0 = pres[0];
1792            m3 = pres[3<<4];
1793            m0 += m3;
1794            m3 = m0 - (m3 << 1);
1795            m1 = pres[1<<4];
1796            m2 = pres[2<<4];
1797            m1 += m2;
1798            m2 = m1 - (m2 << 1);
1799            pres[0] = m0 + m1;
1800            pres[2<<4] = m0 - m1;
1801            pres[1<<4] = m2 + m3;
1802            pres[3<<4] = m3 - m2;
1803
1804            pres++;
1805            k--;
1806        }
1807    }
1808
1809    /* now sum of absolute value */
1810    pres = res;
1811    cost = 0;
1812    k = 128;
1813    while (k > 0)
1814    {
1815        tmp1 = *pres++;
1816        cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1817        tmp1 = *pres++;
1818        cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1819        tmp1 = *pres++;
1820        cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1821        tmp1 = *pres++;
1822        cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1823        tmp1 = *pres++;
1824        cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1825        tmp1 = *pres++;
1826        cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1827        tmp1 = *pres++;
1828        cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1829        tmp1 = *pres++;
1830        cost += ((tmp1 >= 0) ? tmp1 : -tmp1);
1831        k -= 8;
1832        if (cost > min_cost) /* early drop out */
1833        {
1834            return cost;
1835        }
1836    }
1837
1838    return cost;
1839}
1840
1841
1842
1843///////////////////////////////// old code, unused
1844/* find the best intra mode based on original (unencoded) frame */
1845/* output is
1846    currMB->mb_intra, currMB->mbMode,
1847    currMB->i16Mode  (if currMB->mbMode == AVC_I16)
1848    currMB->i4Mode[..] (if currMB->mbMode == AVC_I4) */
1849
1850#ifdef FIXED_INTRAPRED_MODE
1851void MBIntraSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum)
1852{
1853    (void)(mbNum);
1854
1855    AVCCommonObj *video = encvid->common;
1856    int indx, block_x, block_y;
1857
1858    video->intraAvailA = video->intraAvailB = video->intraAvailC = video->intraAvailD = 0;
1859
1860    if (!video->currPicParams->constrained_intra_pred_flag)
1861    {
1862        video->intraAvailA = video->mbAvailA;
1863        video->intraAvailB = video->mbAvailB;
1864        video->intraAvailC = video->mbAvailC;
1865        video->intraAvailD = video->mbAvailD;
1866    }
1867    else
1868    {
1869        if (video->mbAvailA)
1870        {
1871            video->intraAvailA = video->mblock[video->mbAddrA].mb_intra;
1872        }
1873        if (video->mbAvailB)
1874        {
1875            video->intraAvailB = video->mblock[video->mbAddrB].mb_intra ;
1876        }
1877        if (video->mbAvailC)
1878        {
1879            video->intraAvailC = video->mblock[video->mbAddrC].mb_intra;
1880        }
1881        if (video->mbAvailD)
1882        {
1883            video->intraAvailD = video->mblock[video->mbAddrD].mb_intra;
1884        }
1885    }
1886
1887    currMB->mb_intra = TRUE;
1888    currMB->mbMode = FIXED_INTRAPRED_MODE;
1889
1890    if (currMB->mbMode == AVC_I16)
1891    {
1892        currMB->i16Mode = FIXED_I16_MODE;
1893
1894        if (FIXED_I16_MODE == AVC_I16_Vertical && !video->intraAvailB)
1895        {
1896            currMB->i16Mode = AVC_I16_DC;
1897        }
1898
1899        if (FIXED_I16_MODE == AVC_I16_Horizontal && !video->intraAvailA)
1900        {
1901            currMB->i16Mode = AVC_I16_DC;
1902        }
1903
1904        if (FIXED_I16_MODE == AVC_I16_Plane && !(video->intraAvailA && video->intraAvailB && video->intraAvailD))
1905        {
1906            currMB->i16Mode = AVC_I16_DC;
1907        }
1908    }
1909    else //if(currMB->mbMode == AVC_I4)
1910    {
1911        for (indx = 0; indx < 16; indx++)
1912        {
1913            block_x = blkIdx2blkX[indx];
1914            block_y = blkIdx2blkY[indx];
1915
1916            currMB->i4Mode[(block_y<<2)+block_x] = FIXED_I4_MODE;
1917
1918            if (FIXED_I4_MODE == AVC_I4_Vertical && !(block_y > 0 || video->intraAvailB))
1919            {
1920                currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1921            }
1922
1923            if (FIXED_I4_MODE == AVC_I4_Horizontal && !(block_x || video->intraAvailA))
1924            {
1925                currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1926            }
1927
1928            if (FIXED_I4_MODE == AVC_I4_Diagonal_Down_Left &&
1929                    (block_y == 0 && !video->intraAvailB))
1930            {
1931                currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1932            }
1933
1934            if (FIXED_I4_MODE == AVC_I4_Diagonal_Down_Right &&
1935                    !((block_y && block_x)
1936                      || (block_y && video->intraAvailA)
1937                      || (block_x && video->intraAvailB)
1938                      || (video->intraAvailA && video->intraAvailD && video->intraAvailB)))
1939            {
1940                currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1941            }
1942
1943            if (FIXED_I4_MODE == AVC_I4_Vertical_Right &&
1944                    !((block_y && block_x)
1945                      || (block_y && video->intraAvailA)
1946                      || (block_x && video->intraAvailB)
1947                      || (video->intraAvailA && video->intraAvailD && video->intraAvailB)))
1948            {
1949                currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1950            }
1951
1952            if (FIXED_I4_MODE == AVC_I4_Horizontal_Down &&
1953                    !((block_y && block_x)
1954                      || (block_y && video->intraAvailA)
1955                      || (block_x && video->intraAvailB)
1956                      || (video->intraAvailA && video->intraAvailD && video->intraAvailB)))
1957            {
1958                currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1959            }
1960
1961            if (FIXED_I4_MODE == AVC_I4_Vertical_Left &&
1962                    (block_y == 0 && !video->intraAvailB))
1963            {
1964                currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1965            }
1966
1967            if (FIXED_I4_MODE == AVC_I4_Horizontal_Up && !(block_x || video->intraAvailA))
1968            {
1969                currMB->i4Mode[(block_y<<2)+block_x] = AVC_I4_DC;
1970            }
1971        }
1972    }
1973
1974    currMB->intra_chroma_pred_mode = FIXED_INTRA_CHROMA_MODE;
1975
1976    if (FIXED_INTRA_CHROMA_MODE == AVC_IC_Horizontal && !(video->intraAvailA))
1977    {
1978        currMB->intra_chroma_pred_mode = AVC_IC_DC;
1979    }
1980
1981    if (FIXED_INTRA_CHROMA_MODE == AVC_IC_Vertical && !(video->intraAvailB))
1982    {
1983        currMB->intra_chroma_pred_mode = AVC_IC_DC;
1984    }
1985
1986    if (FIXED_INTRA_CHROMA_MODE == AVC_IC_Plane && !(video->intraAvailA && video->intraAvailB && video->intraAvailD))
1987    {
1988        currMB->intra_chroma_pred_mode = AVC_IC_DC;
1989    }
1990
1991    /* also reset the motion vectors */
1992    /* set MV and Ref_Idx codes of Intra blocks in P-slices */
1993    memset(currMB->mvL0, 0, sizeof(int32)*16);
1994    currMB->ref_idx_L0[0] = -1;
1995    currMB->ref_idx_L0[1] = -1;
1996    currMB->ref_idx_L0[2] = -1;
1997    currMB->ref_idx_L0[3] = -1;
1998
1999    // output from this function, currMB->mbMode should be set to either
2000    // AVC_I4, AVC_I16, or else in AVCMBMode enum, mbType, mb_intra, intra_chroma_pred_mode */
2001    return ;
2002}
2003#else // faster combined prediction+SAD calculation
2004void MBIntraSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum)
2005{
2006    AVCCommonObj *video = encvid->common;
2007    AVCFrameIO *currInput = encvid->currInput;
2008    uint8 *curL, *curCb, *curCr;
2009    uint8 *comp, *pred_block;
2010    int block_x, block_y, offset;
2011    uint sad, sad4, sadI4, sadI16;
2012    int component, SubBlock_indx, temp;
2013    int pitch = video->currPic->pitch;
2014
2015    /* calculate the cost of each intra prediction mode  and compare to the
2016    inter mode */
2017    /* full search for all intra prediction */
2018    offset = (video->mb_y << 4) * pitch + (video->mb_x << 4);
2019    curL = currInput->YCbCr[0] + offset;
2020    pred_block = video->pred_block + 84;
2021
2022    /* Assuming that InitNeighborAvailability has been called prior to this function */
2023    video->intraAvailA = video->intraAvailB = video->intraAvailC = video->intraAvailD = 0;
2024
2025    if (!video->currPicParams->constrained_intra_pred_flag)
2026    {
2027        video->intraAvailA = video->mbAvailA;
2028        video->intraAvailB = video->mbAvailB;
2029        video->intraAvailC = video->mbAvailC;
2030        video->intraAvailD = video->mbAvailD;
2031    }
2032    else
2033    {
2034        if (video->mbAvailA)
2035        {
2036            video->intraAvailA = video->mblock[video->mbAddrA].mb_intra;
2037        }
2038        if (video->mbAvailB)
2039        {
2040            video->intraAvailB = video->mblock[video->mbAddrB].mb_intra ;
2041        }
2042        if (video->mbAvailC)
2043        {
2044            video->intraAvailC = video->mblock[video->mbAddrC].mb_intra;
2045        }
2046        if (video->mbAvailD)
2047        {
2048            video->intraAvailD = video->mblock[video->mbAddrD].mb_intra;
2049        }
2050    }
2051
2052    /* currently we're doing exhaustive search. Smart search will be used later */
2053
2054    /* I16 modes */
2055    curL = currInput->YCbCr[0] + offset;
2056    video->pintra_pred_top = curL - pitch;
2057    video->pintra_pred_left = curL - 1;
2058    if (video->mb_y)
2059    {
2060        video->intra_pred_topleft = *(curL - pitch - 1);
2061    }
2062
2063    /* Intra_16x16_Vertical */
2064    sadI16 = 65536;
2065    /* check availability of top */
2066    if (video->intraAvailB)
2067    {
2068        sad = SAD_I16_Vert(video, curL, sadI16);
2069
2070        if (sad < sadI16)
2071        {
2072            sadI16 = sad;
2073            currMB->i16Mode = AVC_I16_Vertical;
2074        }
2075    }
2076    /* Intra_16x16_Horizontal */
2077    /* check availability of left */
2078    if (video->intraAvailA)
2079    {
2080        sad = SAD_I16_HorzDC(video, curL, AVC_I16_Horizontal, sadI16);
2081
2082        if (sad < sadI16)
2083        {
2084            sadI16 = sad;
2085            currMB->i16Mode = AVC_I16_Horizontal;
2086        }
2087    }
2088
2089    /* Intra_16x16_DC, default mode */
2090    sad = SAD_I16_HorzDC(video, curL, AVC_I16_DC, sadI16);
2091    if (sad < sadI16)
2092    {
2093        sadI16 = sad;
2094        currMB->i16Mode = AVC_I16_DC;
2095    }
2096
2097    /* Intra_16x16_Plane */
2098    if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
2099    {
2100        sad = SAD_I16_Plane(video, curL, sadI16);
2101
2102        if (sad < sadI16)
2103        {
2104            sadI16 = sad;
2105            currMB->i16Mode = AVC_I16_Plane;
2106        }
2107    }
2108
2109    sadI16 >>= 1;  /* before comparison */
2110
2111    /* selection between intra4, intra16 or inter mode */
2112    if (sadI16 < encvid->min_cost)
2113    {
2114        currMB->mb_intra = TRUE;
2115        currMB->mbMode = AVC_I16;
2116        encvid->min_cost = sadI16;
2117    }
2118
2119    if (currMB->mb_intra) /* only do the chrominance search when intra is decided */
2120    {
2121        /* Note that we might be able to guess the type of prediction from
2122        the luma prediction type */
2123
2124        /* now search for the best chroma intra prediction */
2125        offset = (offset >> 2) + (video->mb_x << 2);
2126        curCb = currInput->YCbCr[1] + offset;
2127        curCr = currInput->YCbCr[2] + offset;
2128
2129        pitch >>= 1;
2130        video->pintra_pred_top_cb = curCb - pitch;
2131        video->pintra_pred_left_cb = curCb - 1;
2132        video->pintra_pred_top_cr = curCr - pitch;
2133        video->pintra_pred_left_cr = curCr - 1;
2134
2135        if (video->mb_y)
2136        {
2137            video->intra_pred_topleft_cb = *(curCb - pitch - 1);
2138            video->intra_pred_topleft_cr = *(curCr - pitch - 1);
2139        }
2140
2141        /* Intra_Chroma_DC */
2142        sad4 = SAD_Chroma_DC(video, curCb, curCr, 65536);
2143        currMB->intra_chroma_pred_mode = AVC_IC_DC;
2144
2145        /* Intra_Chroma_Horizontal */
2146        if (video->intraAvailA)
2147        {
2148            /* check availability of left */
2149            sad = SAD_Chroma_Horz(video, curCb, curCr, sad4);
2150            if (sad < sad4)
2151            {
2152                sad4 = sad;
2153                currMB->intra_chroma_pred_mode = AVC_IC_Horizontal;
2154            }
2155        }
2156
2157        /* Intra_Chroma_Vertical */
2158        if (video->intraAvailB)
2159        {
2160            /* check availability of top */
2161            sad = SAD_Chroma_Vert(video, curCb, curCr, sad4);
2162
2163            if (sad < sad4)
2164            {
2165                sad4 = sad;
2166                currMB->intra_chroma_pred_mode = AVC_IC_Vertical;
2167            }
2168        }
2169
2170        /* Intra_Chroma_Plane */
2171        if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
2172        {
2173            /* check availability of top and left */
2174            Intra_Chroma_Plane(video, pitch);
2175
2176            sad = SADChroma(pred_block + 452, curCb, curCr, pitch);
2177
2178            if (sad < sad4)
2179            {
2180                sad4 = sad;
2181                currMB->intra_chroma_pred_mode = AVC_IC_Plane;
2182            }
2183        }
2184
2185        /* also reset the motion vectors */
2186        /* set MV and Ref_Idx codes of Intra blocks in P-slices */
2187        memset(currMB->mvL0, 0, sizeof(int32)*16);
2188        memset(currMB->ref_idx_L0, -1, sizeof(int16)*4);
2189
2190    }
2191
2192    // output from this function, currMB->mbMode should be set to either
2193    // AVC_I4, AVC_I16, or else in AVCMBMode enum, mbType, mb_intra, intra_chroma_pred_mode */
2194
2195    return ;
2196}
2197#endif
2198
2199
2200