1/* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18#include "avcdec_lib.h"
19
20#define CLIP_COMP  *comp++ = (uint8)(((uint)temp>0xFF)? 0xFF&(~(temp>>31)): temp)
21#define CLIP_RESULT(x)      if((uint)x > 0xFF){ \
22                 x = 0xFF & (~(x>>31));}
23
24
25/* We should combine the Intra4x4 functions with residual decoding and compensation  */
26AVCStatus IntraMBPrediction(AVCCommonObj *video)
27{
28    int component, SubBlock_indx, temp;
29    AVCStatus status;
30    AVCMacroblock *currMB = video->currMB;
31    AVCPictureData *currPic = video->currPic;
32    uint8 *curL, *curCb, *curCr;
33    uint8 *comp;
34    int block_x, block_y, offset;
35    int16 *dataBlock = video->block;
36    uint8 *predCb, *predCr;
37#ifdef USE_PRED_BLOCK
38    uint8 *pred;
39#endif
40    int pitch = currPic->pitch;
41    uint32 cbp4x4 = video->cbp4x4;
42
43    offset = (video->mb_y << 4) * pitch + (video->mb_x << 4);
44    curL = currPic->Sl + offset;
45
46#ifdef USE_PRED_BLOCK
47    video->pred_block = video->pred + 84;  /* point to separate prediction memory */
48    pred = video->pred_block;
49    video->pred_pitch = 20;
50#else
51    video->pred_block = curL;   /* point directly to the frame buffer */
52    video->pred_pitch = pitch;
53#endif
54
55    if (currMB->mbMode == AVC_I4)
56    {
57        /* luminance first */
58        block_x = block_y = 0;
59        for (component = 0; component < 4; component++)
60        {
61            block_x = ((component & 1) << 1);
62            block_y = ((component >> 1) << 1);
63            comp = curL;// + (block_x<<2) + (block_y<<2)*currPic->pitch;
64
65            for (SubBlock_indx = 0; SubBlock_indx < 4; SubBlock_indx++)
66            {
67                status = Intra_4x4(video, block_x, block_y, comp);
68                if (status != AVC_SUCCESS)
69                {
70                    return status;
71                }
72                /* transform following the 4x4 prediction, can't be SIMD
73                with other blocks. */
74#ifdef USE_PRED_BLOCK
75                if (cbp4x4&(1 << ((block_y << 2) + block_x)))
76                {
77                    itrans(dataBlock, pred, pred, 20);
78                }
79#else
80                if (cbp4x4&(1 << ((block_y << 2) + block_x)))
81                {
82                    itrans(dataBlock, comp, comp, pitch);
83                }
84#endif
85                temp = SubBlock_indx & 1;
86                if (temp)
87                {
88                    block_y++;
89                    block_x--;
90                    dataBlock += 60;
91#ifdef USE_PRED_BLOCK
92                    pred += 76;
93#else
94                    comp += ((pitch << 2) - 4);
95#endif
96                }
97                else
98                {
99                    block_x++;
100                    dataBlock += 4;
101#ifdef USE_PRED_BLOCK
102                    pred += 4;
103#else
104                    comp += 4;
105#endif
106                }
107            }
108            if (component&1)
109            {
110#ifdef USE_PRED_BLOCK
111                pred -= 8;
112#else
113                curL += (pitch << 3) - 8;
114#endif
115                dataBlock -= 8;
116            }
117            else
118            {
119#ifdef USE_PRED_BLOCK
120                pred -= 152;
121#else
122                curL += 8;
123#endif
124                dataBlock -= 120;
125            }
126        }
127        cbp4x4 >>= 16;
128    }
129    else   /* AVC_I16 */
130    {
131#ifdef MB_BASED_DEBLOCK
132        video->pintra_pred_top = video->intra_pred_top + (video->mb_x << 4);
133        video->pintra_pred_left = video->intra_pred_left + 1;
134        video->intra_pred_topleft = video->intra_pred_left[0];
135        pitch = 1;
136#else
137        video->pintra_pred_top = curL - pitch;
138        video->pintra_pred_left = curL - 1;
139        if (video->mb_y)
140        {
141            video->intra_pred_topleft = *(curL - pitch - 1);
142        }
143#endif
144        switch (currMB->i16Mode)
145        {
146            case AVC_I16_Vertical:      /* Intra_16x16_Vertical */
147                /* check availability of top */
148                if (video->intraAvailB)
149                {
150                    Intra_16x16_Vertical(video);
151                }
152                else
153                {
154                    return AVC_FAIL;
155                }
156                break;
157            case AVC_I16_Horizontal:        /* Intra_16x16_Horizontal */
158                /* check availability of left */
159                if (video->intraAvailA)
160                {
161                    Intra_16x16_Horizontal(video, pitch);
162                }
163                else
164                {
165                    return AVC_FAIL;
166                }
167                break;
168            case AVC_I16_DC:        /* Intra_16x16_DC */
169                Intra_16x16_DC(video, pitch);
170                break;
171            case AVC_I16_Plane:     /* Intra_16x16_Plane */
172                if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
173                {
174                    Intra_16x16_Plane(video, pitch);
175                }
176                else
177                {
178                    return AVC_FAIL;
179                }
180                break;
181            default:
182                break;
183        }
184
185        pitch = currPic->pitch;
186
187        /* transform */
188        /* can go in raster scan order now */
189        /* can be done in SIMD,  */
190        for (block_y = 4; block_y > 0; block_y--)
191        {
192            for (block_x = 4; block_x > 0; block_x--)
193            {
194#ifdef USE_PRED_BLOCK
195                if (cbp4x4&1)
196                {
197                    itrans(dataBlock, pred, pred, 20);
198                }
199#else
200                if (cbp4x4&1)
201                {
202                    itrans(dataBlock, curL, curL, pitch);
203                }
204#endif
205                cbp4x4 >>= 1;
206                dataBlock += 4;
207#ifdef USE_PRED_BLOCK
208                pred += 4;
209#else
210                curL += 4;
211#endif
212            }
213            dataBlock += 48;
214#ifdef USE_PRED_BLOCK
215            pred += 64;
216#else
217            curL += ((pitch << 2) - 16);
218#endif
219        }
220    }
221
222    offset = (offset >> 2) + (video->mb_x << 2); //((video->mb_y << 3)* pitch + (video->mb_x << 3));
223    curCb = currPic->Scb + offset;
224    curCr = currPic->Scr + offset;
225
226#ifdef MB_BASED_DEBLOCK
227    video->pintra_pred_top_cb = video->intra_pred_top_cb + (video->mb_x << 3);
228    video->pintra_pred_left_cb = video->intra_pred_left_cb + 1;
229    video->intra_pred_topleft_cb = video->intra_pred_left_cb[0];
230    video->pintra_pred_top_cr = video->intra_pred_top_cr + (video->mb_x << 3);
231    video->pintra_pred_left_cr = video->intra_pred_left_cr + 1;
232    video->intra_pred_topleft_cr = video->intra_pred_left_cr[0];
233    pitch  = 1;
234#else
235    pitch >>= 1;
236    video->pintra_pred_top_cb = curCb - pitch;
237    video->pintra_pred_left_cb = curCb - 1;
238    video->pintra_pred_top_cr = curCr - pitch;
239    video->pintra_pred_left_cr = curCr - 1;
240
241    if (video->mb_y)
242    {
243        video->intra_pred_topleft_cb = *(curCb - pitch - 1);
244        video->intra_pred_topleft_cr = *(curCr - pitch - 1);
245    }
246#endif
247
248#ifdef USE_PRED_BLOCK
249    predCb = video->pred + 452;
250    predCr = predCb + 144;
251    video->pred_pitch = 12;
252#else
253    predCb = curCb;
254    predCr = curCr;
255    video->pred_pitch = currPic->pitch >> 1;
256#endif
257    /* chrominance */
258    switch (currMB->intra_chroma_pred_mode)
259    {
260        case AVC_IC_DC:     /* Intra_Chroma_DC */
261            Intra_Chroma_DC(video, pitch, predCb, predCr);
262            break;
263        case AVC_IC_Horizontal:     /* Intra_Chroma_Horizontal */
264            if (video->intraAvailA)
265            {
266                /* check availability of left */
267                Intra_Chroma_Horizontal(video, pitch, predCb, predCr);
268            }
269            else
270            {
271                return AVC_FAIL;
272            }
273            break;
274        case AVC_IC_Vertical:       /* Intra_Chroma_Vertical */
275            if (video->intraAvailB)
276            {
277                /* check availability of top */
278                Intra_Chroma_Vertical(video, predCb, predCr);
279            }
280            else
281            {
282                return AVC_FAIL;
283            }
284            break;
285        case AVC_IC_Plane:      /* Intra_Chroma_Plane */
286            if (video->intraAvailA && video->intraAvailB && video->intraAvailD)
287            {
288                /* check availability of top and left */
289                Intra_Chroma_Plane(video, pitch, predCb, predCr);
290            }
291            else
292            {
293                return AVC_FAIL;
294            }
295            break;
296        default:
297            break;
298    }
299
300    /* transform, done in raster scan manner */
301    pitch = currPic->pitch >> 1;
302
303    for (block_y = 2; block_y > 0; block_y--)
304    {
305        for (block_x = 2; block_x > 0; block_x--)
306        {
307#ifdef USE_PRED_BLOCK
308            if (cbp4x4&1)
309            {
310                ictrans(dataBlock, predCb, predCb, 12);
311            }
312#else
313            if (cbp4x4&1)
314            {
315                ictrans(dataBlock, curCb, curCb, pitch);
316            }
317#endif
318            cbp4x4 >>= 1;
319            dataBlock += 4;
320#ifdef USE_PRED_BLOCK
321            predCb += 4;
322#else
323            curCb += 4;
324#endif
325        }
326        for (block_x = 2; block_x > 0; block_x--)
327        {
328#ifdef USE_PRED_BLOCK
329            if (cbp4x4&1)
330            {
331                ictrans(dataBlock, predCr, predCr, 12);
332            }
333#else
334            if (cbp4x4&1)
335            {
336                ictrans(dataBlock, curCr, curCr, pitch);
337            }
338#endif
339            cbp4x4 >>= 1;
340            dataBlock += 4;
341#ifdef USE_PRED_BLOCK
342            predCr += 4;
343#else
344            curCr += 4;
345#endif
346        }
347        dataBlock += 48;
348#ifdef USE_PRED_BLOCK
349        predCb += 40;
350        predCr += 40;
351#else
352        curCb += ((pitch << 2) - 8);
353        curCr += ((pitch << 2) - 8);
354#endif
355    }
356
357#ifdef MB_BASED_DEBLOCK
358    SaveNeighborForIntraPred(video, offset);
359#endif
360    return AVC_SUCCESS;
361}
362
363#ifdef MB_BASED_DEBLOCK
364void SaveNeighborForIntraPred(AVCCommonObj *video, int offset)
365{
366    AVCPictureData *currPic = video->currPic;
367    int pitch;
368    uint8 *pred, *predCb, *predCr;
369    uint8 *tmp_ptr, tmp_byte;
370    uint32 tmp_word;
371    int mb_x = video->mb_x;
372
373    /* save the value for intra prediction  */
374#ifdef USE_PRED_BLOCK
375    pitch = 20;
376    pred = video->pred + 384; /* bottom line for Y */
377    predCb = pred + 152;    /* bottom line for Cb */
378    predCr = predCb + 144;  /* bottom line for Cr */
379#else
380    pitch = currPic->pitch;
381    tmp_word = offset + (pitch << 2) - (pitch >> 1);
382    predCb = currPic->Scb + tmp_word;/* bottom line for Cb */
383    predCr = currPic->Scr + tmp_word;/* bottom line for Cr */
384
385    offset = (offset << 2) - (mb_x << 4);
386    pred = currPic->Sl + offset + (pitch << 4) - pitch;/* bottom line for Y */
387
388#endif
389
390    video->intra_pred_topleft = video->intra_pred_top[(mb_x<<4)+15];
391    video->intra_pred_topleft_cb = video->intra_pred_top_cb[(mb_x<<3)+7];
392    video->intra_pred_topleft_cr = video->intra_pred_top_cr[(mb_x<<3)+7];
393
394    /* then copy to video->intra_pred_top, intra_pred_top_cb, intra_pred_top_cr */
395    /*memcpy(video->intra_pred_top + (mb_x<<4), pred, 16);
396    memcpy(video->intra_pred_top_cb + (mb_x<<3), predCb, 8);
397    memcpy(video->intra_pred_top_cr + (mb_x<<3), predCr, 8);*/
398    tmp_ptr = video->intra_pred_top + (mb_x << 4);
399    *((uint32*)tmp_ptr) = *((uint32*)pred);
400    *((uint32*)(tmp_ptr + 4)) = *((uint32*)(pred + 4));
401    *((uint32*)(tmp_ptr + 8)) = *((uint32*)(pred + 8));
402    *((uint32*)(tmp_ptr + 12)) = *((uint32*)(pred + 12));
403    tmp_ptr = video->intra_pred_top_cb + (mb_x << 3);
404    *((uint32*)tmp_ptr) = *((uint32*)predCb);
405    *((uint32*)(tmp_ptr + 4)) = *((uint32*)(predCb + 4));
406    tmp_ptr = video->intra_pred_top_cr + (mb_x << 3);
407    *((uint32*)tmp_ptr) = *((uint32*)predCr);
408    *((uint32*)(tmp_ptr + 4)) = *((uint32*)(predCr + 4));
409
410
411    /* now save last column */
412#ifdef USE_PRED_BLOCK
413    pred = video->pred + 99;    /* last column*/
414#else
415    pred -= ((pitch << 4) - pitch - 15);    /* last column */
416#endif
417    tmp_ptr = video->intra_pred_left;
418    tmp_word = video->intra_pred_topleft;
419    tmp_byte = *(pred);
420    tmp_word |= (tmp_byte << 8);
421    tmp_byte = *(pred += pitch);
422    tmp_word |= (tmp_byte << 16);
423    tmp_byte = *(pred += pitch);
424    tmp_word |= (tmp_byte << 24);
425    *((uint32*)tmp_ptr) = tmp_word;
426    tmp_word = *(pred += pitch);
427    tmp_byte = *(pred += pitch);
428    tmp_word |= (tmp_byte << 8);
429    tmp_byte = *(pred += pitch);
430    tmp_word |= (tmp_byte << 16);
431    tmp_byte = *(pred += pitch);
432    tmp_word |= (tmp_byte << 24);
433    *((uint32*)(tmp_ptr += 4)) = tmp_word;
434    tmp_word = *(pred += pitch);
435    tmp_byte = *(pred += pitch);
436    tmp_word |= (tmp_byte << 8);
437    tmp_byte = *(pred += pitch);
438    tmp_word |= (tmp_byte << 16);
439    tmp_byte = *(pred += pitch);
440    tmp_word |= (tmp_byte << 24);
441    *((uint32*)(tmp_ptr += 4)) = tmp_word;
442    tmp_word = *(pred += pitch);
443    tmp_byte = *(pred += pitch);
444    tmp_word |= (tmp_byte << 8);
445    tmp_byte = *(pred += pitch);
446    tmp_word |= (tmp_byte << 16);
447    tmp_byte = *(pred += pitch);
448    tmp_word |= (tmp_byte << 24);
449    *((uint32*)(tmp_ptr += 4)) = tmp_word;
450    *(tmp_ptr += 4) = *(pred += pitch);
451
452    /* now for Cb */
453#ifdef USE_PRED_BLOCK
454    predCb = video->pred + 459;
455    pitch = 12;
456#else
457    pitch >>= 1;
458    predCb -= (7 * pitch - 7);
459#endif
460    tmp_ptr = video->intra_pred_left_cb;
461    tmp_word = video->intra_pred_topleft_cb;
462    tmp_byte = *(predCb);
463    tmp_word |= (tmp_byte << 8);
464    tmp_byte = *(predCb += pitch);
465    tmp_word |= (tmp_byte << 16);
466    tmp_byte = *(predCb += pitch);
467    tmp_word |= (tmp_byte << 24);
468    *((uint32*)tmp_ptr) = tmp_word;
469    tmp_word = *(predCb += pitch);
470    tmp_byte = *(predCb += pitch);
471    tmp_word |= (tmp_byte << 8);
472    tmp_byte = *(predCb += pitch);
473    tmp_word |= (tmp_byte << 16);
474    tmp_byte = *(predCb += pitch);
475    tmp_word |= (tmp_byte << 24);
476    *((uint32*)(tmp_ptr += 4)) = tmp_word;
477    *(tmp_ptr += 4) = *(predCb += pitch);
478
479    /* now for Cr */
480#ifdef USE_PRED_BLOCK
481    predCr = video->pred + 603;
482#else
483    predCr -= (7 * pitch - 7);
484#endif
485    tmp_ptr = video->intra_pred_left_cr;
486    tmp_word = video->intra_pred_topleft_cr;
487    tmp_byte = *(predCr);
488    tmp_word |= (tmp_byte << 8);
489    tmp_byte = *(predCr += pitch);
490    tmp_word |= (tmp_byte << 16);
491    tmp_byte = *(predCr += pitch);
492    tmp_word |= (tmp_byte << 24);
493    *((uint32*)tmp_ptr) = tmp_word;
494    tmp_word = *(predCr += pitch);
495    tmp_byte = *(predCr += pitch);
496    tmp_word |= (tmp_byte << 8);
497    tmp_byte = *(predCr += pitch);
498    tmp_word |= (tmp_byte << 16);
499    tmp_byte = *(predCr += pitch);
500    tmp_word |= (tmp_byte << 24);
501    *((uint32*)(tmp_ptr += 4)) = tmp_word;
502    *(tmp_ptr += 4) = *(predCr += pitch);
503
504    return ;
505}
506#endif /* MB_BASED_DEBLOCK */
507
508AVCStatus Intra_4x4(AVCCommonObj *video, int block_x, int block_y, uint8 *comp)
509{
510    AVCMacroblock *currMB = video->currMB;
511    int block_offset;
512    AVCNeighborAvailability availability;
513    int pitch = video->currPic->pitch;
514
515#ifdef USE_PRED_BLOCK
516    block_offset = (block_y * 80) + (block_x << 2);
517#else
518    block_offset = (block_y << 2) * pitch + (block_x << 2);
519#endif
520
521#ifdef MB_BASED_DEBLOCK
522    /* boundary blocks use video->pred_intra_top, pred_intra_left, pred_intra_topleft */
523    if (!block_x)
524    {
525        video->pintra_pred_left = video->intra_pred_left + 1 + (block_y << 2);
526        pitch = 1;
527    }
528    else
529    {
530        video->pintra_pred_left = video->pred_block + block_offset - 1;
531        pitch = video->pred_pitch;
532    }
533
534    if (!block_y)
535    {
536        video->pintra_pred_top = video->intra_pred_top + (block_x << 2) + (video->mb_x << 4);
537    }
538    else
539    {
540        video->pintra_pred_top = video->pred_block + block_offset - video->pred_pitch;
541    }
542
543    if (!block_x)
544    {
545        video->intra_pred_topleft = video->intra_pred_left[block_y<<2];
546    }
547    else if (!block_y)
548    {
549        video->intra_pred_topleft = video->intra_pred_top[(video->mb_x<<4)+(block_x<<2)-1];
550    }
551    else
552    {
553        video->intra_pred_topleft = video->pred_block[block_offset - video->pred_pitch - 1];
554    }
555
556#else
557    /* normal case */
558    video->pintra_pred_top = comp - pitch;
559    video->pintra_pred_left = comp - 1;
560    if (video->mb_y || block_y)
561    {
562        video->intra_pred_topleft = *(comp - pitch - 1);
563    }
564#endif
565
566    switch (currMB->i4Mode[(block_y << 2) + block_x])
567    {
568        case AVC_I4_Vertical:       /* Intra_4x4_Vertical */
569            if (block_y > 0 || video->intraAvailB)/* to prevent out-of-bound access*/
570            {
571                Intra_4x4_Vertical(video,  block_offset);
572            }
573            else
574            {
575                return AVC_FAIL;
576            }
577            break;
578
579        case AVC_I4_Horizontal:     /* Intra_4x4_Horizontal */
580            if (block_x || video->intraAvailA)  /* to prevent out-of-bound access */
581            {
582                Intra_4x4_Horizontal(video, pitch, block_offset);
583            }
584            else
585            {
586                return AVC_FAIL;
587            }
588            break;
589
590        case AVC_I4_DC:     /* Intra_4x4_DC */
591            availability.left = TRUE;
592            availability.top = TRUE;
593            if (!block_y)
594            { /* check availability up */
595                availability.top = video->intraAvailB ;
596            }
597            if (!block_x)
598            { /* check availability left */
599                availability.left = video->intraAvailA ;
600            }
601            Intra_4x4_DC(video, pitch, block_offset, &availability);
602            break;
603
604        case AVC_I4_Diagonal_Down_Left:     /* Intra_4x4_Diagonal_Down_Left */
605            /* lookup table will be more appropriate for this case  */
606            if (block_y == 0 && !video->intraAvailB)
607            {
608                return AVC_FAIL;
609            }
610
611            availability.top_right = BlkTopRight[(block_y<<2) + block_x];
612
613            if (availability.top_right == 2)
614            {
615                availability.top_right = video->intraAvailB;
616            }
617            else if (availability.top_right == 3)
618            {
619                availability.top_right = video->intraAvailC;
620            }
621
622            Intra_4x4_Down_Left(video, block_offset, &availability);
623            break;
624
625        case AVC_I4_Diagonal_Down_Right:        /* Intra_4x4_Diagonal_Down_Right */
626            if ((block_y && block_x)  /* to prevent out-of-bound access */
627                    || (block_y && video->intraAvailA)
628                    || (block_x && video->intraAvailB)
629                    || (video->intraAvailA && video->intraAvailD && video->intraAvailB))
630            {
631                Intra_4x4_Diagonal_Down_Right(video, pitch, block_offset);
632            }
633            else
634            {
635                return AVC_FAIL;
636            }
637            break;
638
639        case AVC_I4_Vertical_Right:     /* Intra_4x4_Vertical_Right */
640            if ((block_y && block_x)  /* to prevent out-of-bound access */
641                    || (block_y && video->intraAvailA)
642                    || (block_x && video->intraAvailB)
643                    || (video->intraAvailA && video->intraAvailD && video->intraAvailB))
644            {
645                Intra_4x4_Diagonal_Vertical_Right(video, pitch, block_offset);
646            }
647            else
648            {
649                return AVC_FAIL;
650            }
651            break;
652
653        case AVC_I4_Horizontal_Down:        /* Intra_4x4_Horizontal_Down */
654            if ((block_y && block_x)  /* to prevent out-of-bound access */
655                    || (block_y && video->intraAvailA)
656                    || (block_x && video->intraAvailB)
657                    || (video->intraAvailA && video->intraAvailD && video->intraAvailB))
658            {
659                Intra_4x4_Diagonal_Horizontal_Down(video, pitch, block_offset);
660            }
661            else
662            {
663                return AVC_FAIL;
664            }
665            break;
666
667        case AVC_I4_Vertical_Left:      /* Intra_4x4_Vertical_Left */
668            /* lookup table may be more appropriate for this case  */
669            if (block_y == 0 && !video->intraAvailB)
670            {
671                return AVC_FAIL;
672            }
673
674            availability.top_right = BlkTopRight[(block_y<<2) + block_x];
675
676            if (availability.top_right == 2)
677            {
678                availability.top_right = video->intraAvailB;
679            }
680            else if (availability.top_right == 3)
681            {
682                availability.top_right = video->intraAvailC;
683            }
684
685            Intra_4x4_Vertical_Left(video,  block_offset, &availability);
686            break;
687
688        case AVC_I4_Horizontal_Up:      /* Intra_4x4_Horizontal_Up */
689            if (block_x || video->intraAvailA)
690            {
691                Intra_4x4_Horizontal_Up(video, pitch, block_offset);
692            }
693            else
694            {
695                return AVC_FAIL;
696            }
697            break;
698
699
700        default:
701
702            break;
703    }
704
705    return AVC_SUCCESS;
706}
707
708
709/* =============================== BEGIN 4x4
710MODES======================================*/
711void Intra_4x4_Vertical(AVCCommonObj *video,  int block_offset)
712{
713    uint8 *comp_ref = video->pintra_pred_top;
714    uint32 temp;
715    uint8 *pred = video->pred_block + block_offset;
716    int pred_pitch = video->pred_pitch;
717
718    /*P = (int) *comp_ref++;
719    Q = (int) *comp_ref++;
720    R = (int) *comp_ref++;
721    S = (int) *comp_ref++;
722    temp = S|(R<<8)|(Q<<16)|(P<<24);*/
723    temp = *((uint32*)comp_ref);
724
725    *((uint32*)pred) =  temp; /* write 4 at a time */
726    pred += pred_pitch;
727    *((uint32*)pred) =  temp;
728    pred += pred_pitch;
729    *((uint32*)pred) =  temp;
730    pred += pred_pitch;
731    *((uint32*)pred) =  temp;
732
733    return ;
734}
735
736void Intra_4x4_Horizontal(AVCCommonObj *video, int pitch, int block_offset)
737{
738    uint8   *comp_ref = video->pintra_pred_left;
739    uint32 temp;
740    int P;
741    uint8 *pred = video->pred_block + block_offset;
742    int pred_pitch = video->pred_pitch;
743
744    P = *comp_ref;
745    temp = P | (P << 8);
746    temp = temp | (temp << 16);
747    *((uint32*)pred) = temp;
748    pred += pred_pitch;
749    comp_ref += pitch;
750    P = *comp_ref;
751    temp = P | (P << 8);
752    temp = temp | (temp << 16);
753    *((uint32*)pred) = temp;
754    pred += pred_pitch;
755    comp_ref += pitch;
756    P = *comp_ref;
757    temp = P | (P << 8);
758    temp = temp | (temp << 16);
759    *((uint32*)pred) = temp;
760    pred += pred_pitch;
761    comp_ref += pitch;
762    P = *comp_ref;
763    temp = P | (P << 8);
764    temp = temp | (temp << 16);
765    *((uint32*)pred) = temp;
766
767    return ;
768}
769
770void Intra_4x4_DC(AVCCommonObj *video, int pitch, int block_offset,
771                  AVCNeighborAvailability *availability)
772{
773    uint8   *comp_ref = video->pintra_pred_left;
774    uint32  temp;
775    int DC;
776    uint8 *pred = video->pred_block + block_offset;
777    int pred_pitch = video->pred_pitch;
778
779    if (availability->left)
780    {
781        DC = *comp_ref;
782        comp_ref += pitch;
783        DC += *comp_ref;
784        comp_ref += pitch;
785        DC += *comp_ref;
786        comp_ref += pitch;
787        DC += *comp_ref;
788        comp_ref = video->pintra_pred_top;
789
790        if (availability->top)
791        {
792            DC = (comp_ref[0] + comp_ref[1] + comp_ref[2] + comp_ref[3] + DC + 4) >> 3;
793        }
794        else
795        {
796            DC = (DC + 2) >> 2;
797
798        }
799    }
800    else if (availability->top)
801    {
802        comp_ref = video->pintra_pred_top;
803        DC = (comp_ref[0] + comp_ref[1] + comp_ref[2] + comp_ref[3] + 2) >> 2;
804
805    }
806    else
807    {
808        DC = 128;
809    }
810
811    temp = DC | (DC << 8);
812    temp = temp | (temp << 16);
813    *((uint32*)pred) = temp;
814    pred += pred_pitch;
815    *((uint32*)pred) = temp;
816    pred += pred_pitch;
817    *((uint32*)pred) = temp;
818    pred += pred_pitch;
819    *((uint32*)pred) = temp;
820
821    return ;
822}
823
824void Intra_4x4_Down_Left(AVCCommonObj *video, int block_offset,
825                         AVCNeighborAvailability *availability)
826{
827    uint8   *comp_refx = video->pintra_pred_top;
828    uint32 temp;
829    int r0, r1, r2, r3, r4, r5, r6, r7;
830    uint8 *pred = video->pred_block + block_offset;
831    int pred_pitch = video->pred_pitch;
832
833    r0 = *comp_refx++;
834    r1 = *comp_refx++;
835    r2 = *comp_refx++;
836    r3 = *comp_refx++;
837    if (availability->top_right)
838    {
839        r4 = *comp_refx++;
840        r5 = *comp_refx++;
841        r6 = *comp_refx++;
842        r7 = *comp_refx++;
843    }
844    else
845    {
846        r4 = r3;
847        r5 = r3;
848        r6 = r3;
849        r7 = r3;
850    }
851
852    r0 += (r1 << 1);
853    r0 += r2;
854    r0 += 2;
855    r0 >>= 2;
856    r1 += (r2 << 1);
857    r1 += r3;
858    r1 += 2;
859    r1 >>= 2;
860    r2 += (r3 << 1);
861    r2 += r4;
862    r2 += 2;
863    r2 >>= 2;
864    r3 += (r4 << 1);
865    r3 += r5;
866    r3 += 2;
867    r3 >>= 2;
868    r4 += (r5 << 1);
869    r4 += r6;
870    r4 += 2;
871    r4 >>= 2;
872    r5 += (r6 << 1);
873    r5 += r7;
874    r5 += 2;
875    r5 >>= 2;
876    r6 += (3 * r7);
877    r6 += 2;
878    r6 >>= 2;
879
880    temp = r0 | (r1 << 8);
881    temp |= (r2 << 16);
882    temp |= (r3 << 24);
883    *((uint32*)pred) = temp;
884    pred += pred_pitch;
885
886    temp = (temp >> 8) | (r4 << 24);
887    *((uint32*)pred) = temp;
888    pred += pred_pitch;
889
890    temp = (temp >> 8) | (r5 << 24);
891    *((uint32*)pred) = temp;
892    pred += pred_pitch;
893
894    temp = (temp >> 8) | (r6 << 24);
895    *((uint32*)pred) = temp;
896
897    return ;
898}
899
900void Intra_4x4_Diagonal_Down_Right(AVCCommonObj *video, int pitch, int
901                                   block_offset)
902{
903    uint8 *comp_refx = video->pintra_pred_top;
904    uint8 *comp_refy = video->pintra_pred_left;
905    uint32 temp;
906    int P_x, Q_x, R_x, P_y, Q_y, R_y, D;
907    int x0, x1, x2;
908    uint8 *pred = video->pred_block + block_offset;
909    int pred_pitch = video->pred_pitch;
910
911    temp = *((uint32*)comp_refx); /* read 4 bytes */
912    x0 = temp & 0xFF;
913    x1 = (temp >> 8) & 0xFF;
914    x2 = (temp >> 16) & 0xFF;
915
916    Q_x = (x0 + 2 * x1 + x2 + 2) >> 2;
917    R_x = (x1 + 2 * x2 + (temp >> 24) + 2) >> 2;
918
919    x2 = video->intra_pred_topleft; /* re-use x2 instead of y0 */
920    P_x = (x2 + 2 * x0 + x1 + 2) >> 2;
921
922    x1 = *comp_refy;
923    comp_refy += pitch; /* re-use x1 instead of y1 */
924    D = (x0 + 2 * x2 + x1 + 2) >> 2;
925
926    x0 = *comp_refy;
927    comp_refy += pitch; /* re-use x0 instead of y2 */
928    P_y = (x2 + 2 * x1 + x0 + 2) >> 2;
929
930    x2 = *comp_refy;
931    comp_refy += pitch; /* re-use x2 instead of y3 */
932    Q_y = (x1 + 2 * x0 + x2 + 2) >> 2;
933
934    x1 = *comp_refy;                    /* re-use x1 instead of y4 */
935    R_y = (x0 + 2 * x2 + x1 + 2) >> 2;
936
937    /* we can pack these  */
938    temp =  D | (P_x << 8);   //[D   P_x Q_x R_x]
939    //[P_y D   P_x Q_x]
940    temp |= (Q_x << 16); //[Q_y P_y D   P_x]
941    temp |= (R_x << 24);  //[R_y Q_y P_y D  ]
942    *((uint32*)pred) = temp;
943    pred += pred_pitch;
944
945    temp =  P_y | (D << 8);
946    temp |= (P_x << 16);
947    temp |= (Q_x << 24);
948    *((uint32*)pred) = temp;
949    pred += pred_pitch;
950
951    temp =  Q_y | (P_y << 8);
952    temp |= (D << 16);
953    temp |= (P_x << 24);
954    *((uint32*)pred) = temp;
955    pred += pred_pitch;
956
957    temp = R_y | (Q_y << 8);
958    temp |= (P_y << 16);
959    temp |= (D << 24);
960    *((uint32*)pred) = temp;
961
962    return ;
963}
964
965void    Intra_4x4_Diagonal_Vertical_Right(AVCCommonObj *video, int pitch, int block_offset)
966{
967    uint8   *comp_refx = video->pintra_pred_top;
968    uint8   *comp_refy = video->pintra_pred_left;
969    uint32 temp;
970    int P0, Q0, R0, S0, P1, Q1, R1, P2, Q2, D;
971    int x0, x1, x2;
972    uint8 *pred = video->pred_block + block_offset;
973    int pred_pitch = video->pred_pitch;
974
975    x0 = *comp_refx++;
976    x1 = *comp_refx++;
977    Q0 = x0 + x1 + 1;
978
979    x2 = *comp_refx++;
980    R0 = x1 + x2 + 1;
981
982    x1 = *comp_refx++; /* reuse x1 instead of x3 */
983    S0 = x2 + x1 + 1;
984
985    x1 = video->intra_pred_topleft; /* reuse x1 instead of y0 */
986    P0 = x1 + x0 + 1;
987
988    x2 = *comp_refy;
989    comp_refy += pitch; /* reuse x2 instead of y1 */
990    D = (x2 + 2 * x1 + x0 + 2) >> 2;
991
992    P1 = (P0 + Q0) >> 2;
993    Q1 = (Q0 + R0) >> 2;
994    R1 = (R0 + S0) >> 2;
995
996    P0 >>= 1;
997    Q0 >>= 1;
998    R0 >>= 1;
999    S0 >>= 1;
1000
1001    x0 = *comp_refy;
1002    comp_refy += pitch; /* reuse x0 instead of y2 */
1003    P2 = (x1 + 2 * x2 + x0 + 2) >> 2;
1004    x1 = *comp_refy;
1005    comp_refy += pitch; /* reuse x1 instead of y3 */
1006    Q2 = (x2 + 2 * x0 + x1 + 2) >> 2;
1007
1008    temp =  P0 | (Q0 << 8);  //[P0 Q0 R0 S0]
1009    //[D  P1 Q1 R1]
1010    temp |= (R0 << 16); //[P2 P0 Q0 R0]
1011    temp |= (S0 << 24); //[Q2 D  P1 Q1]
1012    *((uint32*)pred) =  temp;
1013    pred += pred_pitch;
1014
1015    temp =  D | (P1 << 8);
1016    temp |= (Q1 << 16);
1017    temp |= (R1 << 24);
1018    *((uint32*)pred) =  temp;
1019    pred += pred_pitch;
1020
1021    temp = P2 | (P0 << 8);
1022    temp |= (Q0 << 16);
1023    temp |= (R0 << 24);
1024    *((uint32*)pred) =  temp;
1025    pred += pred_pitch;
1026
1027    temp = Q2 | (D << 8);
1028    temp |= (P1 << 16);
1029    temp |= (Q1 << 24);
1030    *((uint32*)pred) =  temp;
1031
1032    return ;
1033}
1034
1035void Intra_4x4_Diagonal_Horizontal_Down(AVCCommonObj *video, int pitch,
1036                                        int block_offset)
1037{
1038    uint8   *comp_refx = video->pintra_pred_top;
1039    uint8   *comp_refy = video->pintra_pred_left;
1040    uint32 temp;
1041    int P0, Q0, R0, S0, P1, Q1, R1, P2, Q2, D;
1042    int x0, x1, x2;
1043    uint8 *pred = video->pred_block + block_offset;
1044    int pred_pitch = video->pred_pitch;
1045
1046    x0 = *comp_refx++;
1047    x1 = *comp_refx++;
1048    x2 = *comp_refx++;
1049    Q2 = (x0 + 2 * x1 + x2 + 2) >> 2;
1050
1051    x2 = video->intra_pred_topleft; /* reuse x2 instead of y0 */
1052    P2 = (x2 + 2 * x0 + x1 + 2) >> 2;
1053
1054    x1 = *comp_refy;
1055    comp_refy += pitch; /* reuse x1 instead of y1 */
1056    D = (x1 + 2 * x2 + x0 + 2) >> 2;
1057    P0 = x2 + x1 + 1;
1058
1059    x0 = *comp_refy;
1060    comp_refy += pitch; /* reuse x0 instead of y2 */
1061    Q0 = x1 + x0 + 1;
1062
1063    x1 = *comp_refy;
1064    comp_refy += pitch; /* reuse x1 instead of y3 */
1065    R0 = x0 + x1 + 1;
1066
1067    x2 = *comp_refy;    /* reuse x2 instead of y4 */
1068    S0 = x1 + x2 + 1;
1069
1070    P1 = (P0 + Q0) >> 2;
1071    Q1 = (Q0 + R0) >> 2;
1072    R1 = (R0 + S0) >> 2;
1073
1074    P0 >>= 1;
1075    Q0 >>= 1;
1076    R0 >>= 1;
1077    S0 >>= 1;
1078
1079
1080    /* we can pack these  */
1081    temp = P0 | (D << 8);   //[P0 D  P2 Q2]
1082    //[Q0 P1 P0 D ]
1083    temp |= (P2 << 16);  //[R0 Q1 Q0 P1]
1084    temp |= (Q2 << 24); //[S0 R1 R0 Q1]
1085    *((uint32*)pred) = temp;
1086    pred += pred_pitch;
1087
1088    temp = Q0 | (P1 << 8);
1089    temp |= (P0 << 16);
1090    temp |= (D << 24);
1091    *((uint32*)pred) = temp;
1092    pred += pred_pitch;
1093
1094    temp = R0 | (Q1 << 8);
1095    temp |= (Q0 << 16);
1096    temp |= (P1 << 24);
1097    *((uint32*)pred) = temp;
1098    pred += pred_pitch;
1099
1100    temp = S0 | (R1 << 8);
1101    temp |= (R0 << 16);
1102    temp |= (Q1 << 24);
1103    *((uint32*)pred) = temp;
1104
1105    return ;
1106}
1107
1108void Intra_4x4_Vertical_Left(AVCCommonObj *video, int block_offset, AVCNeighborAvailability *availability)
1109{
1110    uint8   *comp_refx = video->pintra_pred_top;
1111    uint32 temp1, temp2;
1112    int x0, x1, x2, x3, x4, x5, x6;
1113    uint8 *pred = video->pred_block + block_offset;
1114    int pred_pitch = video->pred_pitch;
1115
1116    x0 = *comp_refx++;
1117    x1 = *comp_refx++;
1118    x2 = *comp_refx++;
1119    x3 = *comp_refx++;
1120    if (availability->top_right)
1121    {
1122        x4 = *comp_refx++;
1123        x5 = *comp_refx++;
1124        x6 = *comp_refx++;
1125    }
1126    else
1127    {
1128        x4 = x3;
1129        x5 = x3;
1130        x6 = x3;
1131    }
1132
1133    x0 += x1 + 1;
1134    x1 += x2 + 1;
1135    x2 += x3 + 1;
1136    x3 += x4 + 1;
1137    x4 += x5 + 1;
1138    x5 += x6 + 1;
1139
1140    temp1 = (x0 >> 1);
1141    temp1 |= ((x1 >> 1) << 8);
1142    temp1 |= ((x2 >> 1) << 16);
1143    temp1 |= ((x3 >> 1) << 24);
1144
1145    *((uint32*)pred) = temp1;
1146    pred += pred_pitch;
1147
1148    temp2 = ((x0 + x1) >> 2);
1149    temp2 |= (((x1 + x2) >> 2) << 8);
1150    temp2 |= (((x2 + x3) >> 2) << 16);
1151    temp2 |= (((x3 + x4) >> 2) << 24);
1152
1153    *((uint32*)pred) = temp2;
1154    pred += pred_pitch;
1155
1156    temp1 = (temp1 >> 8) | ((x4 >> 1) << 24);   /* rotate out old value */
1157    *((uint32*)pred) = temp1;
1158    pred += pred_pitch;
1159
1160    temp2 = (temp2 >> 8) | (((x4 + x5) >> 2) << 24); /* rotate out old value */
1161    *((uint32*)pred) = temp2;
1162    pred += pred_pitch;
1163
1164    return ;
1165}
1166
1167void Intra_4x4_Horizontal_Up(AVCCommonObj *video, int pitch, int block_offset)
1168{
1169    uint8   *comp_refy = video->pintra_pred_left;
1170    uint32 temp;
1171    int Q0, R0, Q1, D0, D1, P0, P1;
1172    int y0, y1, y2, y3;
1173    uint8 *pred = video->pred_block + block_offset;
1174    int pred_pitch = video->pred_pitch;
1175
1176    y0 = *comp_refy;
1177    comp_refy += pitch;
1178    y1 = *comp_refy;
1179    comp_refy += pitch;
1180    y2 = *comp_refy;
1181    comp_refy += pitch;
1182    y3 = *comp_refy;
1183
1184    Q0 = (y1 + y2 + 1) >> 1;
1185    Q1 = (y1 + (y2 << 1) + y3 + 2) >> 2;
1186    P0 = ((y0 + y1 + 1) >> 1);
1187    P1 = ((y0 + (y1 << 1) + y2 + 2) >> 2);
1188
1189    temp = P0 | (P1 << 8);      // [P0 P1 Q0 Q1]
1190    temp |= (Q0 << 16);     // [Q0 Q1 R0 DO]
1191    temp |= (Q1 << 24);     // [R0 D0 D1 D1]
1192    *((uint32*)pred) = temp;      // [D1 D1 D1 D1]
1193    pred += pred_pitch;
1194
1195    D0 = (y2 + 3 * y3 + 2) >> 2;
1196    R0 = (y2 + y3 + 1) >> 1;
1197
1198    temp = Q0 | (Q1 << 8);
1199    temp |= (R0 << 16);
1200    temp |= (D0 << 24);
1201    *((uint32*)pred) = temp;
1202    pred += pred_pitch;
1203
1204    D1 = y3;
1205
1206    temp = R0 | (D0 << 8);
1207    temp |= (D1 << 16);
1208    temp |= (D1 << 24);
1209    *((uint32*)pred) = temp;
1210    pred += pred_pitch;
1211
1212    temp = D1 | (D1 << 8);
1213    temp |= (temp << 16);
1214    *((uint32*)pred) = temp;
1215
1216    return ;
1217}
1218/* =============================== END 4x4 MODES======================================*/
1219void  Intra_16x16_Vertical(AVCCommonObj *video)
1220{
1221    int i;
1222    uint32 temp1, temp2, temp3, temp4;
1223    uint8   *comp_ref = video->pintra_pred_top;
1224    uint8 *pred = video->pred_block;
1225    int pred_pitch = video->pred_pitch;
1226
1227    temp1 = *((uint32*)comp_ref);
1228    comp_ref += 4;
1229
1230    temp2 = *((uint32*)comp_ref);
1231    comp_ref += 4;
1232
1233    temp3 = *((uint32*)comp_ref);
1234    comp_ref += 4;
1235
1236    temp4 = *((uint32*)comp_ref);
1237    comp_ref += 4;
1238
1239    i = 16;
1240    while (i > 0)
1241    {
1242        *((uint32*)pred) = temp1;
1243        *((uint32*)(pred + 4)) = temp2;
1244        *((uint32*)(pred + 8)) = temp3;
1245        *((uint32*)(pred + 12)) = temp4;
1246        pred += pred_pitch;
1247        i--;
1248    }
1249
1250    return ;
1251}
1252
1253void Intra_16x16_Horizontal(AVCCommonObj *video, int pitch)
1254{
1255    int i;
1256    uint32 temp;
1257    uint8 *comp_ref = video->pintra_pred_left;
1258    uint8 *pred = video->pred_block;
1259    int pred_pitch = video->pred_pitch;
1260
1261    for (i = 0; i < 16; i++)
1262    {
1263        temp = *comp_ref;
1264        temp |= (temp << 8);
1265        temp |= (temp << 16);
1266        *((uint32*)pred) = temp;
1267        *((uint32*)(pred + 4)) = temp;
1268        *((uint32*)(pred + 8)) = temp;
1269        *((uint32*)(pred + 12)) = temp;
1270        pred += pred_pitch;
1271        comp_ref += pitch;
1272    }
1273}
1274
1275
1276void  Intra_16x16_DC(AVCCommonObj *video, int pitch)
1277{
1278    int i;
1279    uint32 temp, temp2;
1280    uint8 *comp_ref_x = video->pintra_pred_top;
1281    uint8 *comp_ref_y = video->pintra_pred_left;
1282    int sum = 0;
1283    uint8 *pred = video->pred_block;
1284    int pred_pitch = video->pred_pitch;
1285
1286    if (video->intraAvailB)
1287    {
1288        temp = *((uint32*)comp_ref_x);
1289        comp_ref_x += 4;
1290        temp2 = (temp >> 8) & 0xFF00FF;
1291        temp &= 0xFF00FF;
1292        temp += temp2;
1293        sum = temp + (temp >> 16);
1294        temp = *((uint32*)comp_ref_x);
1295        comp_ref_x += 4;
1296        temp2 = (temp >> 8) & 0xFF00FF;
1297        temp &= 0xFF00FF;
1298        temp += temp2;
1299        sum += temp + (temp >> 16);
1300        temp = *((uint32*)comp_ref_x);
1301        comp_ref_x += 4;
1302        temp2 = (temp >> 8) & 0xFF00FF;
1303        temp &= 0xFF00FF;
1304        temp += temp2;
1305        sum += temp + (temp >> 16);
1306        temp = *((uint32*)comp_ref_x);
1307        comp_ref_x += 4;
1308        temp2 = (temp >> 8) & 0xFF00FF;
1309        temp &= 0xFF00FF;
1310        temp += temp2;
1311        sum += temp + (temp >> 16);
1312        sum &= 0xFFFF;
1313
1314        if (video->intraAvailA)
1315        {
1316            for (i = 0; i < 16; i++)
1317            {
1318                sum += (*comp_ref_y);
1319                comp_ref_y += pitch;
1320            }
1321            sum = (sum + 16) >> 5;
1322        }
1323        else
1324        {
1325            sum = (sum + 8) >> 4;
1326        }
1327    }
1328    else if (video->intraAvailA)
1329    {
1330        for (i = 0; i < 16; i++)
1331        {
1332            sum += *comp_ref_y;
1333            comp_ref_y += pitch;
1334        }
1335        sum = (sum + 8) >> 4;
1336    }
1337    else
1338    {
1339        sum = 128;
1340    }
1341
1342    temp = sum | (sum << 8);
1343    temp |= (temp << 16);
1344
1345    for (i = 0; i < 16; i++)
1346    {
1347        *((uint32*)pred) = temp;
1348        *((uint32*)(pred + 4)) = temp;
1349        *((uint32*)(pred + 8)) = temp;
1350        *((uint32*)(pred + 12)) = temp;
1351        pred += pred_pitch;
1352    }
1353
1354}
1355
1356void Intra_16x16_Plane(AVCCommonObj *video, int pitch)
1357{
1358    int i, a_16, b, c, factor_c;
1359    uint8 *comp_ref_x = video->pintra_pred_top;
1360    uint8 *comp_ref_y = video->pintra_pred_left;
1361    uint8 *comp_ref_x0, *comp_ref_x1, *comp_ref_y0, *comp_ref_y1;
1362    int H = 0, V = 0 , tmp;
1363    uint8 *pred = video->pred_block;
1364    uint32 temp;
1365    uint8 byte1, byte2, byte3;
1366    int value;
1367    int pred_pitch = video->pred_pitch;
1368
1369    comp_ref_x0 = comp_ref_x + 8;
1370    comp_ref_x1 = comp_ref_x + 6;
1371    comp_ref_y0 = comp_ref_y + (pitch << 3);
1372    comp_ref_y1 = comp_ref_y + 6 * pitch;
1373
1374    for (i = 1; i < 8; i++)
1375    {
1376        H += i * (*comp_ref_x0++ - *comp_ref_x1--);
1377        V += i * (*comp_ref_y0 - *comp_ref_y1);
1378        comp_ref_y0 += pitch;
1379        comp_ref_y1 -= pitch;
1380    }
1381
1382    H += i * (*comp_ref_x0++ - video->intra_pred_topleft);
1383    V += i * (*comp_ref_y0 - *comp_ref_y1);
1384
1385
1386    a_16 = ((*(comp_ref_x + 15) + *(comp_ref_y + 15 * pitch)) << 4) + 16;;
1387    b = (5 * H + 32) >> 6;
1388    c = (5 * V + 32) >> 6;
1389
1390    tmp = 0;
1391
1392    for (i = 0; i < 16; i++)
1393    {
1394        factor_c = a_16 + c * (tmp++ - 7);
1395
1396        factor_c -= 7 * b;
1397
1398        value = factor_c >> 5;
1399        factor_c += b;
1400        CLIP_RESULT(value)
1401        byte1 = value;
1402        value = factor_c >> 5;
1403        factor_c += b;
1404        CLIP_RESULT(value)
1405        byte2 = value;
1406        value = factor_c >> 5;
1407        factor_c += b;
1408        CLIP_RESULT(value)
1409        byte3 = value;
1410        value = factor_c >> 5;
1411        factor_c += b;
1412        CLIP_RESULT(value)
1413        temp = byte1 | (byte2 << 8);
1414        temp |= (byte3 << 16);
1415        temp |= (value << 24);
1416        *((uint32*)pred) = temp;
1417
1418        value = factor_c >> 5;
1419        factor_c += b;
1420        CLIP_RESULT(value)
1421        byte1 = value;
1422        value = factor_c >> 5;
1423        factor_c += b;
1424        CLIP_RESULT(value)
1425        byte2 = value;
1426        value = factor_c >> 5;
1427        factor_c += b;
1428        CLIP_RESULT(value)
1429        byte3 = value;
1430        value = factor_c >> 5;
1431        factor_c += b;
1432        CLIP_RESULT(value)
1433        temp = byte1 | (byte2 << 8);
1434        temp |= (byte3 << 16);
1435        temp |= (value << 24);
1436        *((uint32*)(pred + 4)) = temp;
1437
1438        value = factor_c >> 5;
1439        factor_c += b;
1440        CLIP_RESULT(value)
1441        byte1 = value;
1442        value = factor_c >> 5;
1443        factor_c += b;
1444        CLIP_RESULT(value)
1445        byte2 = value;
1446        value = factor_c >> 5;
1447        factor_c += b;
1448        CLIP_RESULT(value)
1449        byte3 = value;
1450        value = factor_c >> 5;
1451        factor_c += b;
1452        CLIP_RESULT(value)
1453        temp = byte1 | (byte2 << 8);
1454        temp |= (byte3 << 16);
1455        temp |= (value << 24);
1456        *((uint32*)(pred + 8)) = temp;
1457
1458        value = factor_c >> 5;
1459        factor_c += b;
1460        CLIP_RESULT(value)
1461        byte1 = value;
1462        value = factor_c >> 5;
1463        factor_c += b;
1464        CLIP_RESULT(value)
1465        byte2 = value;
1466        value = factor_c >> 5;
1467        factor_c += b;
1468        CLIP_RESULT(value)
1469        byte3 = value;
1470        value = factor_c >> 5;
1471        CLIP_RESULT(value)
1472        temp = byte1 | (byte2 << 8);
1473        temp |= (byte3 << 16);
1474        temp |= (value << 24);
1475        *((uint32*)(pred + 12)) = temp;
1476        pred += pred_pitch;
1477    }
1478}
1479
1480/************** Chroma intra prediction *********************/
1481
1482void Intra_Chroma_DC(AVCCommonObj *video, int pitch, uint8 *predCb, uint8 *predCr)
1483{
1484    int i;
1485    uint32 temp, temp2, pred_a, pred_b;
1486    uint8 *comp_ref_x, *comp_ref_y;
1487    uint8 *comp_ref_cb_x = video->pintra_pred_top_cb;
1488    uint8 *comp_ref_cb_y = video->pintra_pred_left_cb;
1489    uint8 *comp_ref_cr_x = video->pintra_pred_top_cr;
1490    uint8 *comp_ref_cr_y = video->pintra_pred_left_cr;
1491    int  component, j;
1492    int  sum_x0, sum_x1, sum_y0, sum_y1;
1493    int pred_0[2], pred_1[2], pred_2[2], pred_3[2];
1494    int pred_pitch = video->pred_pitch;
1495    uint8 *pred;
1496
1497    if (video->intraAvailB & video->intraAvailA)
1498    {
1499        comp_ref_x = comp_ref_cb_x;
1500        comp_ref_y = comp_ref_cb_y;
1501        for (i = 0; i < 2; i++)
1502        {
1503            temp = *((uint32*)comp_ref_x);
1504            comp_ref_x += 4;
1505            temp2 = (temp >> 8) & 0xFF00FF;
1506            temp &= 0xFF00FF;
1507            temp += temp2;
1508            temp += (temp >> 16);
1509            sum_x0 = temp & 0xFFFF;
1510
1511            temp = *((uint32*)comp_ref_x);
1512            temp2 = (temp >> 8) & 0xFF00FF;
1513            temp &= 0xFF00FF;
1514            temp += temp2;
1515            temp += (temp >> 16);
1516            sum_x1 = temp & 0xFFFF;
1517
1518            pred_1[i] = (sum_x1 + 2) >> 2;
1519
1520            sum_y0 = *comp_ref_y;
1521            sum_y0 += *(comp_ref_y += pitch);
1522            sum_y0 += *(comp_ref_y += pitch);
1523            sum_y0 += *(comp_ref_y += pitch);
1524
1525            sum_y1 = *(comp_ref_y += pitch);
1526            sum_y1 += *(comp_ref_y += pitch);
1527            sum_y1 += *(comp_ref_y += pitch);
1528            sum_y1 += *(comp_ref_y += pitch);
1529
1530            pred_2[i] = (sum_y1 + 2) >> 2;
1531
1532            pred_0[i] = (sum_y0 + sum_x0 + 4) >> 3;
1533            pred_3[i] = (sum_y1 + sum_x1 + 4) >> 3;
1534
1535            comp_ref_x = comp_ref_cr_x;
1536            comp_ref_y = comp_ref_cr_y;
1537        }
1538    }
1539
1540    else if (video->intraAvailA)
1541    {
1542        comp_ref_y = comp_ref_cb_y;
1543        for (i = 0; i < 2; i++)
1544        {
1545            sum_y0 = *comp_ref_y;
1546            sum_y0 += *(comp_ref_y += pitch);
1547            sum_y0 += *(comp_ref_y += pitch);
1548            sum_y0 += *(comp_ref_y += pitch);
1549
1550            sum_y1 = *(comp_ref_y += pitch);
1551            sum_y1 += *(comp_ref_y += pitch);
1552            sum_y1 += *(comp_ref_y += pitch);
1553            sum_y1 += *(comp_ref_y += pitch);
1554
1555            pred_0[i] = pred_1[i] = (sum_y0 + 2) >> 2;
1556            pred_2[i] = pred_3[i] = (sum_y1 + 2) >> 2;
1557            comp_ref_y = comp_ref_cr_y;
1558        }
1559    }
1560    else if (video->intraAvailB)
1561    {
1562        comp_ref_x = comp_ref_cb_x;
1563        for (i = 0; i < 2; i++)
1564        {
1565            temp = *((uint32*)comp_ref_x);
1566            comp_ref_x += 4;
1567            temp2 = (temp >> 8) & 0xFF00FF;
1568            temp &= 0xFF00FF;
1569            temp += temp2;
1570            temp += (temp >> 16);
1571            sum_x0 = temp & 0xFFFF;
1572
1573            temp = *((uint32*)comp_ref_x);
1574            temp2 = (temp >> 8) & 0xFF00FF;
1575            temp &= 0xFF00FF;
1576            temp += temp2;
1577            temp += (temp >> 16);
1578            sum_x1 = temp & 0xFFFF;
1579
1580            pred_0[i] = pred_2[i] = (sum_x0 + 2) >> 2;
1581            pred_1[i] = pred_3[i] = (sum_x1 + 2) >> 2;
1582            comp_ref_x = comp_ref_cr_x;
1583        }
1584    }
1585    else
1586    {
1587        pred_0[0] = pred_0[1] = pred_1[0] = pred_1[1] =
1588                                                pred_2[0] = pred_2[1] = pred_3[0] = pred_3[1] = 128;
1589    }
1590
1591    pred = predCb;
1592    for (component = 0; component < 2; component++)
1593    {
1594        pred_a = pred_0[component];
1595        pred_b = pred_1[component];
1596        pred_a |= (pred_a << 8);
1597        pred_a |= (pred_a << 16);
1598        pred_b |= (pred_b << 8);
1599        pred_b |= (pred_b << 16);
1600
1601        for (i = 4; i < 6; i++)
1602        {
1603            for (j = 0; j < 4; j++) /* 4 lines */
1604            {
1605                *((uint32*)pred) = pred_a;
1606                *((uint32*)(pred + 4)) = pred_b;
1607                pred += pred_pitch; /* move to the next line */
1608            }
1609            pred_a = pred_2[component];
1610            pred_b = pred_3[component];
1611            pred_a |= (pred_a << 8);
1612            pred_a |= (pred_a << 16);
1613            pred_b |= (pred_b << 8);
1614            pred_b |= (pred_b << 16);
1615        }
1616        pred = predCr; /* point to cr */
1617    }
1618}
1619
1620void  Intra_Chroma_Horizontal(AVCCommonObj *video, int pitch, uint8 *predCb, uint8 *predCr)
1621{
1622    int i;
1623    uint32 temp;
1624    uint8   *comp_ref_cb_y = video->pintra_pred_left_cb;
1625    uint8   *comp_ref_cr_y = video->pintra_pred_left_cr;
1626    uint8  *comp;
1627    int component, j;
1628    int     pred_pitch = video->pred_pitch;
1629    uint8   *pred;
1630
1631    comp = comp_ref_cb_y;
1632    pred = predCb;
1633    for (component = 0; component < 2; component++)
1634    {
1635        for (i = 4; i < 6; i++)
1636        {
1637            for (j = 0; j < 4; j++)
1638            {
1639                temp = *comp;
1640                comp += pitch;
1641                temp |= (temp << 8);
1642                temp |= (temp << 16);
1643                *((uint32*)pred) = temp;
1644                *((uint32*)(pred + 4)) = temp;
1645                pred += pred_pitch;
1646            }
1647        }
1648        comp = comp_ref_cr_y;
1649        pred = predCr; /* point to cr */
1650    }
1651
1652}
1653
1654void  Intra_Chroma_Vertical(AVCCommonObj *video, uint8 *predCb, uint8 *predCr)
1655{
1656    uint32  temp1, temp2;
1657    uint8   *comp_ref_cb_x = video->pintra_pred_top_cb;
1658    uint8   *comp_ref_cr_x = video->pintra_pred_top_cr;
1659    uint8   *comp_ref;
1660    int     component, j;
1661    int     pred_pitch = video->pred_pitch;
1662    uint8   *pred;
1663
1664    comp_ref = comp_ref_cb_x;
1665    pred = predCb;
1666    for (component = 0; component < 2; component++)
1667    {
1668        temp1 = *((uint32*)comp_ref);
1669        temp2 = *((uint32*)(comp_ref + 4));
1670        for (j = 0; j < 8; j++)
1671        {
1672            *((uint32*)pred) = temp1;
1673            *((uint32*)(pred + 4)) = temp2;
1674            pred += pred_pitch;
1675        }
1676        comp_ref = comp_ref_cr_x;
1677        pred = predCr; /* point to cr */
1678    }
1679
1680}
1681
1682void  Intra_Chroma_Plane(AVCCommonObj *video, int pitch, uint8 *predCb, uint8 *predCr)
1683{
1684    int i;
1685    int a_16_C[2], b_C[2], c_C[2], a_16, b, c, factor_c;
1686    uint8 *comp_ref_x, *comp_ref_y, *comp_ref_x0, *comp_ref_x1,  *comp_ref_y0, *comp_ref_y1;
1687    int component, j;
1688    int H, V, tmp;
1689    uint32 temp;
1690    uint8 byte1, byte2, byte3;
1691    int value;
1692    uint8 topleft;
1693    int pred_pitch = video->pred_pitch;
1694    uint8 *pred;
1695
1696    comp_ref_x = video->pintra_pred_top_cb;
1697    comp_ref_y = video->pintra_pred_left_cb;
1698    topleft = video->intra_pred_topleft_cb;
1699
1700    for (component = 0; component < 2; component++)
1701    {
1702        H = V = 0;
1703        comp_ref_x0 = comp_ref_x + 4;
1704        comp_ref_x1 = comp_ref_x + 2;
1705        comp_ref_y0 = comp_ref_y + (pitch << 2);
1706        comp_ref_y1 = comp_ref_y + (pitch << 1);
1707        for (i = 1; i < 4; i++)
1708        {
1709            H += i * (*comp_ref_x0++ - *comp_ref_x1--);
1710            V += i * (*comp_ref_y0 - *comp_ref_y1);
1711            comp_ref_y0 += pitch;
1712            comp_ref_y1 -= pitch;
1713        }
1714        H += i * (*comp_ref_x0++ - topleft);
1715        V += i * (*comp_ref_y0 - *comp_ref_y1);
1716
1717        a_16_C[component] = ((*(comp_ref_x + 7) + *(comp_ref_y + 7 * pitch)) << 4) + 16;
1718        b_C[component] = (17 * H + 16) >> 5;
1719        c_C[component] = (17 * V + 16) >> 5;
1720
1721        comp_ref_x = video->pintra_pred_top_cr;
1722        comp_ref_y = video->pintra_pred_left_cr;
1723        topleft = video->intra_pred_topleft_cr;
1724    }
1725
1726    pred = predCb;
1727    for (component = 0; component < 2; component++)
1728    {
1729        a_16 = a_16_C[component];
1730        b = b_C[component];
1731        c = c_C[component];
1732        tmp = 0;
1733        for (i = 4; i < 6; i++)
1734        {
1735            for (j = 0; j < 4; j++)
1736            {
1737                factor_c = a_16 + c * (tmp++ - 3);
1738
1739                factor_c -= 3 * b;
1740
1741                value = factor_c >> 5;
1742                factor_c += b;
1743                CLIP_RESULT(value)
1744                byte1 = value;
1745                value = factor_c >> 5;
1746                factor_c += b;
1747                CLIP_RESULT(value)
1748                byte2 = value;
1749                value = factor_c >> 5;
1750                factor_c += b;
1751                CLIP_RESULT(value)
1752                byte3 = value;
1753                value = factor_c >> 5;
1754                factor_c += b;
1755                CLIP_RESULT(value)
1756                temp = byte1 | (byte2 << 8);
1757                temp |= (byte3 << 16);
1758                temp |= (value << 24);
1759                *((uint32*)pred) = temp;
1760
1761                value = factor_c >> 5;
1762                factor_c += b;
1763                CLIP_RESULT(value)
1764                byte1 = value;
1765                value = factor_c >> 5;
1766                factor_c += b;
1767                CLIP_RESULT(value)
1768                byte2 = value;
1769                value = factor_c >> 5;
1770                factor_c += b;
1771                CLIP_RESULT(value)
1772                byte3 = value;
1773                value = factor_c >> 5;
1774                factor_c += b;
1775                CLIP_RESULT(value)
1776                temp = byte1 | (byte2 << 8);
1777                temp |= (byte3 << 16);
1778                temp |= (value << 24);
1779                *((uint32*)(pred + 4)) = temp;
1780                pred += pred_pitch;
1781            }
1782        }
1783        pred = predCr; /* point to cr */
1784    }
1785}
1786
1787