motion_est.cpp revision 29a84457aed4c45bc900998b5e11c03023264208
1/* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18#include "avcenc_lib.h"
19
20#define MIN_GOP     1   /* minimum size of GOP, 1/23/01, need to be tested */
21
22#define DEFAULT_REF_IDX     0  /* always from the first frame in the reflist */
23
24#define ALL_CAND_EQUAL  10  /*  any number greater than 5 will work */
25
26
27/* from TMN 3.2 */
28#define PREF_NULL_VEC 129   /* zero vector bias */
29#define PREF_16_VEC 129     /* 1MV bias versus 4MVs*/
30#define PREF_INTRA  3024//512       /* bias for INTRA coding */
31
32const static int tab_exclude[9][9] =  // [last_loc][curr_loc]
33{
34    {0, 0, 0, 0, 0, 0, 0, 0, 0},
35    {0, 0, 0, 0, 1, 1, 1, 0, 0},
36    {0, 0, 0, 0, 1, 1, 1, 1, 1},
37    {0, 0, 0, 0, 0, 0, 1, 1, 1},
38    {0, 1, 1, 0, 0, 0, 1, 1, 1},
39    {0, 1, 1, 0, 0, 0, 0, 0, 1},
40    {0, 1, 1, 1, 1, 0, 0, 0, 1},
41    {0, 0, 1, 1, 1, 0, 0, 0, 0},
42    {0, 0, 1, 1, 1, 1, 1, 0, 0}
43}; //to decide whether to continue or compute
44
45const static int refine_next[8][2] =    /* [curr_k][increment] */
46{
47    {0, 0}, {2, 0}, {1, 1}, {0, 2}, { -1, 1}, { -2, 0}, { -1, -1}, {0, -2}
48};
49
50#ifdef _SAD_STAT
51uint32 num_MB = 0;
52uint32 num_cand = 0;
53#endif
54
55/************************************************************************/
56#define TH_INTER_2  100  /* temporary for now */
57
58//#define FIXED_INTERPRED_MODE  AVC_P16
59#define FIXED_REF_IDX   0
60#define FIXED_MVX 0
61#define FIXED_MVY 0
62
63// only use when AVC_P8 or AVC_P8ref0
64#define FIXED_SUBMB_MODE    AVC_4x4
65/*************************************************************************/
66
67/* Initialize arrays necessary for motion search */
68AVCEnc_Status InitMotionSearchModule(AVCHandle *avcHandle)
69{
70    AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject;
71    AVCRateControl *rateCtrl = encvid->rateCtrl;
72    int search_range = rateCtrl->mvRange;
73    int number_of_subpel_positions = 4 * (2 * search_range + 3);
74    int max_mv_bits, max_mvd;
75    int temp_bits = 0;
76    uint8 *mvbits;
77    int bits, imax, imin, i;
78    uint8* subpel_pred = (uint8*) encvid->subpel_pred; // all 16 sub-pel positions
79
80
81    while (number_of_subpel_positions > 0)
82    {
83        temp_bits++;
84        number_of_subpel_positions >>= 1;
85    }
86
87    max_mv_bits = 3 + 2 * temp_bits;
88    max_mvd  = (1 << (max_mv_bits >> 1)) - 1;
89
90    encvid->mvbits_array = (uint8*) avcHandle->CBAVC_Malloc(encvid->avcHandle->userData,
91                           sizeof(uint8) * (2 * max_mvd + 1), DEFAULT_ATTR);
92
93    if (encvid->mvbits_array == NULL)
94    {
95        return AVCENC_MEMORY_FAIL;
96    }
97
98    mvbits = encvid->mvbits  = encvid->mvbits_array + max_mvd;
99
100    mvbits[0] = 1;
101    for (bits = 3; bits <= max_mv_bits; bits += 2)
102    {
103        imax = 1    << (bits >> 1);
104        imin = imax >> 1;
105
106        for (i = imin; i < imax; i++)   mvbits[-i] = mvbits[i] = bits;
107    }
108
109    /* initialize half-pel search */
110    encvid->hpel_cand[0] = subpel_pred + REF_CENTER;
111    encvid->hpel_cand[1] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1 ;
112    encvid->hpel_cand[2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
113    encvid->hpel_cand[3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
114    encvid->hpel_cand[4] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
115    encvid->hpel_cand[5] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25;
116    encvid->hpel_cand[6] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
117    encvid->hpel_cand[7] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
118    encvid->hpel_cand[8] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
119
120    /* For quarter-pel interpolation around best half-pel result */
121
122    encvid->bilin_base[0][0] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
123    encvid->bilin_base[0][1] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
124    encvid->bilin_base[0][2] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
125    encvid->bilin_base[0][3] = subpel_pred + REF_CENTER;
126
127
128    encvid->bilin_base[1][0] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE;
129    encvid->bilin_base[1][1] = subpel_pred + REF_CENTER - 24;
130    encvid->bilin_base[1][2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
131    encvid->bilin_base[1][3] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
132
133    encvid->bilin_base[2][0] = subpel_pred + REF_CENTER - 24;
134    encvid->bilin_base[2][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
135    encvid->bilin_base[2][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
136    encvid->bilin_base[2][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
137
138    encvid->bilin_base[3][0] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1;
139    encvid->bilin_base[3][1] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1;
140    encvid->bilin_base[3][2] = subpel_pred + REF_CENTER;
141    encvid->bilin_base[3][3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
142
143    encvid->bilin_base[4][0] = subpel_pred + REF_CENTER;
144    encvid->bilin_base[4][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
145    encvid->bilin_base[4][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25;
146    encvid->bilin_base[4][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25;
147
148    encvid->bilin_base[5][0] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
149    encvid->bilin_base[5][1] = subpel_pred + REF_CENTER;
150    encvid->bilin_base[5][2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
151    encvid->bilin_base[5][3] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25;
152
153    encvid->bilin_base[6][0] = subpel_pred + REF_CENTER - 1;
154    encvid->bilin_base[6][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
155    encvid->bilin_base[6][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 24;
156    encvid->bilin_base[6][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
157
158    encvid->bilin_base[7][0] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE;
159    encvid->bilin_base[7][1] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
160    encvid->bilin_base[7][2] = subpel_pred + REF_CENTER - 1;
161    encvid->bilin_base[7][3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24;
162
163    encvid->bilin_base[8][0] = subpel_pred + REF_CENTER - 25;
164    encvid->bilin_base[8][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE;
165    encvid->bilin_base[8][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE;
166    encvid->bilin_base[8][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE;
167
168
169    return AVCENC_SUCCESS;
170}
171
172/* Clean-up memory */
173void CleanMotionSearchModule(AVCHandle *avcHandle)
174{
175    AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject;
176
177    if (encvid->mvbits_array)
178    {
179        avcHandle->CBAVC_Free(avcHandle->userData, (int)(encvid->mvbits_array));
180        encvid->mvbits = NULL;
181    }
182
183    return ;
184}
185
186
187bool IntraDecisionABE(int *min_cost, uint8 *cur, int pitch, bool ave)
188{
189    int j;
190    uint8 *out;
191    int temp, SBE;
192    OsclFloat ABE;
193    bool intra = true;
194
195    SBE = 0;
196    /* top neighbor */
197    out = cur - pitch;
198    for (j = 0; j < 16; j++)
199    {
200        temp = out[j] - cur[j];
201        SBE += ((temp >= 0) ? temp : -temp);
202    }
203
204    /* left neighbor */
205    out = cur - 1;
206    out -= pitch;
207    cur -= pitch;
208    for (j = 0; j < 16; j++)
209    {
210        temp = *(out += pitch) - *(cur += pitch);
211        SBE += ((temp >= 0) ? temp : -temp);
212    }
213
214    /* compare mincost/384 and SBE/64 */
215    ABE = SBE / 32.0; //ABE = SBE/64.0; //
216    if (ABE >= *min_cost / 256.0) //if( ABE*0.8 >= min_cost/384.0) //
217    {
218        intra = false; // no possibility of intra, just use inter
219    }
220    else
221    {
222        if (ave == true)
223        {
224            *min_cost = (*min_cost + (int)(SBE * 8)) >> 1; // possibility of intra, averaging the cost
225        }
226        else
227        {
228            *min_cost = (int)(SBE * 8);
229        }
230    }
231
232    return intra;
233}
234
235/******* main function for macroblock prediction for the entire frame ***/
236/* if turns out to be IDR frame, set video->nal_unit_type to AVC_NALTYPE_IDR */
237void AVCMotionEstimation(AVCEncObject *encvid)
238{
239    AVCCommonObj *video = encvid->common;
240    int slice_type = video->slice_type;
241    AVCFrameIO *currInput = encvid->currInput;
242    AVCPictureData *refPic = video->RefPicList0[0];
243    int i, j, k;
244    int mbwidth = video->PicWidthInMbs;
245    int mbheight = video->PicHeightInMbs;
246    int totalMB = video->PicSizeInMbs;
247    int pitch = currInput->pitch;
248    AVCMacroblock *currMB, *mblock = video->mblock;
249    AVCMV *mot_mb_16x16, *mot16x16 = encvid->mot16x16;
250    // AVCMV *mot_mb_16x8, *mot_mb_8x16, *mot_mb_8x8, etc;
251    AVCRateControl *rateCtrl = encvid->rateCtrl;
252    uint8 *intraSearch = encvid->intraSearch;
253    uint FS_en = encvid->fullsearch_enable;
254
255    int NumIntraSearch, start_i, numLoop, incr_i;
256    int mbnum, offset;
257    uint8 *cur, *best_cand[5];
258    int totalSAD = 0;   /* average SAD for rate control */
259    int type_pred;
260    int abe_cost;
261
262#ifdef HTFM
263    /***** HYPOTHESIS TESTING ********/  /* 2/28/01 */
264    int collect = 0;
265    HTFM_Stat htfm_stat;
266    double newvar[16];
267    double exp_lamda[15];
268    /*********************************/
269#endif
270    int hp_guess = 0;
271    uint32 mv_uint32;
272
273    offset = 0;
274
275    if (slice_type == AVC_I_SLICE)
276    {
277        /* cannot do I16 prediction here because it needs full decoding. */
278        for (i = 0; i < totalMB; i++)
279        {
280            encvid->min_cost[i] = 0x7FFFFFFF;  /* max value for int */
281        }
282
283        memset(intraSearch, 1, sizeof(uint8)*totalMB);
284
285        encvid->firstIntraRefreshMBIndx = 0; /* reset this */
286
287        return ;
288    }
289    else   // P_SLICE
290    {
291        for (i = 0; i < totalMB; i++)
292        {
293            mblock[i].mb_intra = 0;
294        }
295        memset(intraSearch, 1, sizeof(uint8)*totalMB);
296    }
297
298    if (refPic->padded == 0)
299    {
300        AVCPaddingEdge(refPic);
301        refPic->padded = 1;
302    }
303    /* Random INTRA update */
304    if (rateCtrl->intraMBRate)
305    {
306        AVCRasterIntraUpdate(encvid, mblock, totalMB, rateCtrl->intraMBRate);
307    }
308
309    encvid->sad_extra_info = NULL;
310#ifdef HTFM
311    /***** HYPOTHESIS TESTING ********/
312    InitHTFM(video, &htfm_stat, newvar, &collect);
313    /*********************************/
314#endif
315
316    if ((rateCtrl->scdEnable == 1)
317            && ((rateCtrl->frame_rate < 5.0) || (video->sliceHdr->frame_num > MIN_GOP)))
318        /* do not try to detect a new scene if low frame rate and too close to previous I-frame */
319    {
320        incr_i = 2;
321        numLoop = 2;
322        start_i = 1;
323        type_pred = 0; /* for initial candidate selection */
324    }
325    else
326    {
327        incr_i = 1;
328        numLoop = 1;
329        start_i = 0;
330        type_pred = 2;
331    }
332
333    /* First pass, loop thru half the macroblock */
334    /* determine scene change */
335    /* Second pass, for the rest of macroblocks */
336    NumIntraSearch = 0; // to be intra searched in the encoding loop.
337    while (numLoop--)
338    {
339        for (j = 0; j < mbheight; j++)
340        {
341            if (incr_i > 1)
342                start_i = (start_i == 0 ? 1 : 0) ; /* toggle 0 and 1 */
343
344            offset = pitch * (j << 4) + (start_i << 4);
345
346            mbnum = j * mbwidth + start_i;
347
348            for (i = start_i; i < mbwidth; i += incr_i)
349            {
350                video->mbNum = mbnum;
351                video->currMB = currMB = mblock + mbnum;
352                mot_mb_16x16 = mot16x16 + mbnum;
353
354                cur = currInput->YCbCr[0] + offset;
355
356                if (currMB->mb_intra == 0) /* for INTER mode */
357                {
358#if defined(HTFM)
359                    HTFMPrepareCurMB_AVC(encvid, &htfm_stat, cur, pitch);
360#else
361                    AVCPrepareCurMB(encvid, cur, pitch);
362#endif
363                    /************************************************************/
364                    /******** full-pel 1MV search **********************/
365
366                    AVCMBMotionSearch(encvid, cur, best_cand, i << 4, j << 4, type_pred,
367                                      FS_en, &hp_guess);
368
369                    abe_cost = encvid->min_cost[mbnum] = mot_mb_16x16->sad;
370
371                    /* set mbMode and MVs */
372                    currMB->mbMode = AVC_P16;
373                    currMB->MBPartPredMode[0][0] = AVC_Pred_L0;
374                    mv_uint32 = ((mot_mb_16x16->y) << 16) | ((mot_mb_16x16->x) & 0xffff);
375                    for (k = 0; k < 32; k += 2)
376                    {
377                        currMB->mvL0[k>>1] = mv_uint32;
378                    }
379
380                    /* make a decision whether it should be tested for intra or not */
381                    if (i != mbwidth - 1 && j != mbheight - 1 && i != 0 && j != 0)
382                    {
383                        if (false == IntraDecisionABE(&abe_cost, cur, pitch, true))
384                        {
385                            intraSearch[mbnum] = 0;
386                        }
387                        else
388                        {
389                            NumIntraSearch++;
390                            rateCtrl->MADofMB[mbnum] = abe_cost;
391                        }
392                    }
393                    else // boundary MBs, always do intra search
394                    {
395                        NumIntraSearch++;
396                    }
397
398                    totalSAD += (int) rateCtrl->MADofMB[mbnum];//mot_mb_16x16->sad;
399                }
400                else    /* INTRA update, use for prediction */
401                {
402                    mot_mb_16x16[0].x = mot_mb_16x16[0].y = 0;
403
404                    /* reset all other MVs to zero */
405                    /* mot_mb_16x8, mot_mb_8x16, mot_mb_8x8, etc. */
406                    abe_cost = encvid->min_cost[mbnum] = 0x7FFFFFFF;  /* max value for int */
407
408                    if (i != mbwidth - 1 && j != mbheight - 1 && i != 0 && j != 0)
409                    {
410                        IntraDecisionABE(&abe_cost, cur, pitch, false);
411
412                        rateCtrl->MADofMB[mbnum] = abe_cost;
413                        totalSAD += abe_cost;
414                    }
415
416                    NumIntraSearch++ ;
417                    /* cannot do I16 prediction here because it needs full decoding. */
418                    // intraSearch[mbnum] = 1;
419
420                }
421
422                mbnum += incr_i;
423                offset += (incr_i << 4);
424
425            } /* for i */
426        } /* for j */
427
428        /* since we cannot do intra/inter decision here, the SCD has to be
429        based on other criteria such as motion vectors coherency or the SAD */
430        if (incr_i > 1 && numLoop) /* scene change on and first loop */
431        {
432            //if(NumIntraSearch > ((totalMB>>3)<<1) + (totalMB>>3)) /* 75% of 50%MBs */
433            if (NumIntraSearch*99 > (48*totalMB)) /* 20% of 50%MBs */
434                /* need to do more investigation about this threshold since the NumIntraSearch
435                only show potential intra MBs, not the actual one */
436            {
437                /* we can choose to just encode I_SLICE without IDR */
438                //video->nal_unit_type = AVC_NALTYPE_IDR;
439                video->nal_unit_type = AVC_NALTYPE_SLICE;
440                video->sliceHdr->slice_type = AVC_I_ALL_SLICE;
441                video->slice_type = AVC_I_SLICE;
442                memset(intraSearch, 1, sizeof(uint8)*totalMB);
443                i = totalMB;
444                while (i--)
445                {
446                    mblock[i].mb_intra = 1;
447                    encvid->min_cost[i] = 0x7FFFFFFF;  /* max value for int */
448                }
449
450                rateCtrl->totalSAD = totalSAD * 2;  /* SAD */
451
452                return ;
453            }
454        }
455        /******** no scene change, continue motion search **********************/
456        start_i = 0;
457        type_pred++; /* second pass */
458    }
459
460    rateCtrl->totalSAD = totalSAD;  /* SAD */
461
462#ifdef HTFM
463    /***** HYPOTHESIS TESTING ********/
464    if (collect)
465    {
466        collect = 0;
467        UpdateHTFM(encvid, newvar, exp_lamda, &htfm_stat);
468    }
469    /*********************************/
470#endif
471
472    return ;
473}
474
475/*=====================================================================
476    Function:   PaddingEdge
477    Date:       09/16/2000
478    Purpose:    Pad edge of a Vop
479=====================================================================*/
480
481void  AVCPaddingEdge(AVCPictureData *refPic)
482{
483    uint8 *src, *dst;
484    int i;
485    int pitch, width, height;
486    uint32 temp1, temp2;
487
488    width = refPic->width;
489    height = refPic->height;
490    pitch = refPic->pitch;
491
492    /* pad top */
493    src = refPic->Sl;
494
495    temp1 = *src; /* top-left corner */
496    temp2 = src[width-1]; /* top-right corner */
497    temp1 |= (temp1 << 8);
498    temp1 |= (temp1 << 16);
499    temp2 |= (temp2 << 8);
500    temp2 |= (temp2 << 16);
501
502    dst = src - (pitch << 4);
503
504    *((uint32*)(dst - 16)) = temp1;
505    *((uint32*)(dst - 12)) = temp1;
506    *((uint32*)(dst - 8)) = temp1;
507    *((uint32*)(dst - 4)) = temp1;
508
509    memcpy(dst, src, width);
510
511    *((uint32*)(dst += width)) = temp2;
512    *((uint32*)(dst + 4)) = temp2;
513    *((uint32*)(dst + 8)) = temp2;
514    *((uint32*)(dst + 12)) = temp2;
515
516    dst = dst - width - 16;
517
518    i = 15;
519    while (i--)
520    {
521        memcpy(dst + pitch, dst, pitch);
522        dst += pitch;
523    }
524
525    /* pad sides */
526    dst += (pitch + 16);
527    src = dst;
528    i = height;
529    while (i--)
530    {
531        temp1 = *src;
532        temp2 = src[width-1];
533        temp1 |= (temp1 << 8);
534        temp1 |= (temp1 << 16);
535        temp2 |= (temp2 << 8);
536        temp2 |= (temp2 << 16);
537
538        *((uint32*)(dst - 16)) = temp1;
539        *((uint32*)(dst - 12)) = temp1;
540        *((uint32*)(dst - 8)) = temp1;
541        *((uint32*)(dst - 4)) = temp1;
542
543        *((uint32*)(dst += width)) = temp2;
544        *((uint32*)(dst + 4)) = temp2;
545        *((uint32*)(dst + 8)) = temp2;
546        *((uint32*)(dst + 12)) = temp2;
547
548        src += pitch;
549        dst = src;
550    }
551
552    /* pad bottom */
553    dst -= 16;
554    i = 16;
555    while (i--)
556    {
557        memcpy(dst, dst - pitch, pitch);
558        dst += pitch;
559    }
560
561
562    return ;
563}
564
565/*===========================================================================
566    Function:   AVCRasterIntraUpdate
567    Date:       2/26/01
568    Purpose:    To raster-scan assign INTRA-update .
569                N macroblocks are updated (also was programmable).
570===========================================================================*/
571void AVCRasterIntraUpdate(AVCEncObject *encvid, AVCMacroblock *mblock, int totalMB, int numRefresh)
572{
573    int indx, i;
574
575    indx = encvid->firstIntraRefreshMBIndx;
576    for (i = 0; i < numRefresh && indx < totalMB; i++)
577    {
578        (mblock + indx)->mb_intra = 1;
579        encvid->intraSearch[indx++] = 1;
580    }
581
582    /* if read the end of frame, reset and loop around */
583    if (indx >= totalMB - 1)
584    {
585        indx = 0;
586        while (i < numRefresh && indx < totalMB)
587        {
588            (mblock + indx)->mb_intra = 1;
589            encvid->intraSearch[indx++] = 1;
590            i++;
591        }
592    }
593
594    encvid->firstIntraRefreshMBIndx = indx; /* update with a new value */
595
596    return ;
597}
598
599
600#ifdef HTFM
601void InitHTFM(VideoEncData *encvid, HTFM_Stat *htfm_stat, double *newvar, int *collect)
602{
603    AVCCommonObj *video = encvid->common;
604    int i;
605    int lx = video->currPic->width; // padding
606    int lx2 = lx << 1;
607    int lx3 = lx2 + lx;
608    int rx = video->currPic->pitch;
609    int rx2 = rx << 1;
610    int rx3 = rx2 + rx;
611
612    int *offset, *offset2;
613
614    /* 4/11/01, collect data every 30 frames, doesn't have to be base layer */
615    if (((int)video->sliceHdr->frame_num) % 30 == 1)
616    {
617
618        *collect = 1;
619
620        htfm_stat->countbreak = 0;
621        htfm_stat->abs_dif_mad_avg = 0;
622
623        for (i = 0; i < 16; i++)
624        {
625            newvar[i] = 0.0;
626        }
627//      encvid->functionPointer->SAD_MB_PADDING = &SAD_MB_PADDING_HTFM_Collect;
628        encvid->functionPointer->SAD_Macroblock = &SAD_MB_HTFM_Collect;
629        encvid->functionPointer->SAD_MB_HalfPel[0] = NULL;
630        encvid->functionPointer->SAD_MB_HalfPel[1] = &SAD_MB_HP_HTFM_Collectxh;
631        encvid->functionPointer->SAD_MB_HalfPel[2] = &SAD_MB_HP_HTFM_Collectyh;
632        encvid->functionPointer->SAD_MB_HalfPel[3] = &SAD_MB_HP_HTFM_Collectxhyh;
633        encvid->sad_extra_info = (void*)(htfm_stat);
634        offset = htfm_stat->offsetArray;
635        offset2 = htfm_stat->offsetRef;
636    }
637    else
638    {
639//      encvid->functionPointer->SAD_MB_PADDING = &SAD_MB_PADDING_HTFM;
640        encvid->functionPointer->SAD_Macroblock = &SAD_MB_HTFM;
641        encvid->functionPointer->SAD_MB_HalfPel[0] = NULL;
642        encvid->functionPointer->SAD_MB_HalfPel[1] = &SAD_MB_HP_HTFMxh;
643        encvid->functionPointer->SAD_MB_HalfPel[2] = &SAD_MB_HP_HTFMyh;
644        encvid->functionPointer->SAD_MB_HalfPel[3] = &SAD_MB_HP_HTFMxhyh;
645        encvid->sad_extra_info = (void*)(encvid->nrmlz_th);
646        offset = encvid->nrmlz_th + 16;
647        offset2 = encvid->nrmlz_th + 32;
648    }
649
650    offset[0] = 0;
651    offset[1] = lx2 + 2;
652    offset[2] = 2;
653    offset[3] = lx2;
654    offset[4] = lx + 1;
655    offset[5] = lx3 + 3;
656    offset[6] = lx + 3;
657    offset[7] = lx3 + 1;
658    offset[8] = lx;
659    offset[9] = lx3 + 2;
660    offset[10] = lx3 ;
661    offset[11] = lx + 2 ;
662    offset[12] = 1;
663    offset[13] = lx2 + 3;
664    offset[14] = lx2 + 1;
665    offset[15] = 3;
666
667    offset2[0] = 0;
668    offset2[1] = rx2 + 2;
669    offset2[2] = 2;
670    offset2[3] = rx2;
671    offset2[4] = rx + 1;
672    offset2[5] = rx3 + 3;
673    offset2[6] = rx + 3;
674    offset2[7] = rx3 + 1;
675    offset2[8] = rx;
676    offset2[9] = rx3 + 2;
677    offset2[10] = rx3 ;
678    offset2[11] = rx + 2 ;
679    offset2[12] = 1;
680    offset2[13] = rx2 + 3;
681    offset2[14] = rx2 + 1;
682    offset2[15] = 3;
683
684    return ;
685}
686
687void UpdateHTFM(AVCEncObject *encvid, double *newvar, double *exp_lamda, HTFM_Stat *htfm_stat)
688{
689    if (htfm_stat->countbreak == 0)
690        htfm_stat->countbreak = 1;
691
692    newvar[0] = (double)(htfm_stat->abs_dif_mad_avg) / (htfm_stat->countbreak * 16.);
693
694    if (newvar[0] < 0.001)
695    {
696        newvar[0] = 0.001; /* to prevent floating overflow */
697    }
698    exp_lamda[0] =  1 / (newvar[0] * 1.4142136);
699    exp_lamda[1] = exp_lamda[0] * 1.5825;
700    exp_lamda[2] = exp_lamda[0] * 2.1750;
701    exp_lamda[3] = exp_lamda[0] * 3.5065;
702    exp_lamda[4] = exp_lamda[0] * 3.1436;
703    exp_lamda[5] = exp_lamda[0] * 3.5315;
704    exp_lamda[6] = exp_lamda[0] * 3.7449;
705    exp_lamda[7] = exp_lamda[0] * 4.5854;
706    exp_lamda[8] = exp_lamda[0] * 4.6191;
707    exp_lamda[9] = exp_lamda[0] * 5.4041;
708    exp_lamda[10] = exp_lamda[0] * 6.5974;
709    exp_lamda[11] = exp_lamda[0] * 10.5341;
710    exp_lamda[12] = exp_lamda[0] * 10.0719;
711    exp_lamda[13] = exp_lamda[0] * 12.0516;
712    exp_lamda[14] = exp_lamda[0] * 15.4552;
713
714    CalcThreshold(HTFM_Pf, exp_lamda, encvid->nrmlz_th);
715    return ;
716}
717
718
719void CalcThreshold(double pf, double exp_lamda[], int nrmlz_th[])
720{
721    int i;
722    double temp[15];
723    //  printf("\nLamda: ");
724
725    /* parametric PREMODELling */
726    for (i = 0; i < 15; i++)
727    {
728        //    printf("%g ",exp_lamda[i]);
729        if (pf < 0.5)
730            temp[i] = 1 / exp_lamda[i] * M4VENC_LOG(2 * pf);
731        else
732            temp[i] = -1 / exp_lamda[i] * M4VENC_LOG(2 * (1 - pf));
733    }
734
735    nrmlz_th[15] = 0;
736    for (i = 0; i < 15; i++)        /* scale upto no.pixels */
737        nrmlz_th[i] = (int)(temp[i] * ((i + 1) << 4) + 0.5);
738
739    return ;
740}
741
742void    HTFMPrepareCurMB_AVC(AVCEncObject *encvid, HTFM_Stat *htfm_stat, uint8 *cur, int pitch)
743{
744    AVCCommonObj *video = encvid->common;
745    uint32 *htfmMB = (uint32*)(encvid->currYMB);
746    uint8 *ptr, byte;
747    int *offset;
748    int i;
749    uint32 word;
750
751    if (((int)video->sliceHdr->frame_num) % 30 == 1)
752    {
753        offset = htfm_stat->offsetArray;
754    }
755    else
756    {
757        offset = encvid->nrmlz_th + 16;
758    }
759
760    for (i = 0; i < 16; i++)
761    {
762        ptr = cur + offset[i];
763        word = ptr[0];
764        byte = ptr[4];
765        word |= (byte << 8);
766        byte = ptr[8];
767        word |= (byte << 16);
768        byte = ptr[12];
769        word |= (byte << 24);
770        *htfmMB++ = word;
771
772        word = *(ptr += (pitch << 2));
773        byte = ptr[4];
774        word |= (byte << 8);
775        byte = ptr[8];
776        word |= (byte << 16);
777        byte = ptr[12];
778        word |= (byte << 24);
779        *htfmMB++ = word;
780
781        word = *(ptr += (pitch << 2));
782        byte = ptr[4];
783        word |= (byte << 8);
784        byte = ptr[8];
785        word |= (byte << 16);
786        byte = ptr[12];
787        word |= (byte << 24);
788        *htfmMB++ = word;
789
790        word = *(ptr += (pitch << 2));
791        byte = ptr[4];
792        word |= (byte << 8);
793        byte = ptr[8];
794        word |= (byte << 16);
795        byte = ptr[12];
796        word |= (byte << 24);
797        *htfmMB++ = word;
798    }
799
800    return ;
801}
802
803
804#endif // HTFM
805
806void    AVCPrepareCurMB(AVCEncObject *encvid, uint8 *cur, int pitch)
807{
808    void* tmp = (void*)(encvid->currYMB);
809    uint32 *currYMB = (uint32*) tmp;
810    int i;
811
812    cur -= pitch;
813
814    for (i = 0; i < 16; i++)
815    {
816        *currYMB++ = *((uint32*)(cur += pitch));
817        *currYMB++ = *((uint32*)(cur + 4));
818        *currYMB++ = *((uint32*)(cur + 8));
819        *currYMB++ = *((uint32*)(cur + 12));
820    }
821
822    return ;
823}
824
825#ifdef FIXED_INTERPRED_MODE
826
827/* due to the complexity of the predicted motion vector, we may not decide to skip
828a macroblock here just yet. */
829/* We will find the best motion vector and the best intra prediction mode for each block. */
830/* output are
831    currMB->NumMbPart,  currMB->MbPartWidth, currMB->MbPartHeight,
832    currMB->NumSubMbPart[], currMB->SubMbPartWidth[], currMB->SubMbPartHeight,
833    currMB->MBPartPredMode[][] (L0 or L1 or BiPred)
834    currMB->RefIdx[], currMB->ref_idx_L0[],
835    currMB->mvL0[], currMB->mvL1[]
836    */
837
838AVCEnc_Status AVCMBMotionSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum,
839                                int num_pass)
840{
841    AVCCommonObj *video = encvid->common;
842    int mbPartIdx, subMbPartIdx;
843    int16 *mv;
844    int i;
845    int SubMbPartHeight, SubMbPartWidth, NumSubMbPart;
846
847    /* assign value to currMB->MBPartPredMode[][x],subMbMode[],NumSubMbPart[],SubMbPartWidth[],SubMbPartHeight[] */
848
849    currMB->mbMode = FIXED_INTERPRED_MODE;
850    currMB->mb_intra = 0;
851
852    if (currMB->mbMode == AVC_P16)
853    {
854        currMB->NumMbPart = 1;
855        currMB->MbPartWidth = 16;
856        currMB->MbPartHeight = 16;
857        currMB->SubMbPartHeight[0] = 16;
858        currMB->SubMbPartWidth[0] = 16;
859        currMB->NumSubMbPart[0] =  1;
860    }
861    else if (currMB->mbMode == AVC_P16x8)
862    {
863        currMB->NumMbPart = 2;
864        currMB->MbPartWidth = 16;
865        currMB->MbPartHeight = 8;
866        for (i = 0; i < 2; i++)
867        {
868            currMB->SubMbPartWidth[i] = 16;
869            currMB->SubMbPartHeight[i] = 8;
870            currMB->NumSubMbPart[i] = 1;
871        }
872    }
873    else if (currMB->mbMode == AVC_P8x16)
874    {
875        currMB->NumMbPart = 2;
876        currMB->MbPartWidth = 8;
877        currMB->MbPartHeight = 16;
878        for (i = 0; i < 2; i++)
879        {
880            currMB->SubMbPartWidth[i] = 8;
881            currMB->SubMbPartHeight[i] = 16;
882            currMB->NumSubMbPart[i] = 1;
883        }
884    }
885    else if (currMB->mbMode == AVC_P8 || currMB->mbMode == AVC_P8ref0)
886    {
887        currMB->NumMbPart = 4;
888        currMB->MbPartWidth = 8;
889        currMB->MbPartHeight = 8;
890        if (FIXED_SUBMB_MODE == AVC_8x8)
891        {
892            SubMbPartHeight = 8;
893            SubMbPartWidth = 8;
894            NumSubMbPart = 1;
895        }
896        else if (FIXED_SUBMB_MODE == AVC_8x4)
897        {
898            SubMbPartHeight = 4;
899            SubMbPartWidth = 8;
900            NumSubMbPart = 2;
901        }
902        else if (FIXED_SUBMB_MODE == AVC_4x8)
903        {
904            SubMbPartHeight = 8;
905            SubMbPartWidth = 4;
906            NumSubMbPart = 2;
907        }
908        else if (FIXED_SUBMB_MODE == AVC_4x4)
909        {
910            SubMbPartHeight = 4;
911            SubMbPartWidth = 4;
912            NumSubMbPart = 4;
913        }
914
915        for (i = 0; i < 4; i++)
916        {
917            currMB->subMbMode[i] = FIXED_SUBMB_MODE;
918            currMB->SubMbPartHeight[i] = SubMbPartHeight;
919            currMB->SubMbPartWidth[i] = SubMbPartWidth;
920            currMB->NumSubMbPart[i] = NumSubMbPart;
921        }
922    }
923    else /* it's probably intra mode */
924    {
925        return AVCENC_SUCCESS;
926    }
927
928    for (mbPartIdx = 0; mbPartIdx < 4; mbPartIdx++)
929    {
930        currMB->MBPartPredMode[mbPartIdx][0]  = AVC_Pred_L0;
931        currMB->ref_idx_L0[mbPartIdx] = FIXED_REF_IDX;
932        currMB->RefIdx[mbPartIdx] = video->RefPicList0[FIXED_REF_IDX]->RefIdx;
933
934        for (subMbPartIdx = 0; subMbPartIdx < 4; subMbPartIdx++)
935        {
936            mv = (int16*)(currMB->mvL0 + (mbPartIdx << 2) + subMbPartIdx);
937
938            *mv++ = FIXED_MVX;
939            *mv = FIXED_MVY;
940        }
941    }
942
943    encvid->min_cost = 0;
944
945    return AVCENC_SUCCESS;
946}
947
948#else /* perform the search */
949
950/* This option #1 search is very similar to PV's MPEG4 motion search algorithm.
951  The search is done in hierarchical manner from 16x16 MB down to smaller and smaller
952  partition. At each level, a decision can be made to stop the search if the expected
953  prediction gain is not worth the computation. The decision can also be made at the finest
954  level for more fullsearch-like behavior with the price of heavier computation. */
955void AVCMBMotionSearch(AVCEncObject *encvid, uint8 *cur, uint8 *best_cand[],
956                       int i0, int j0, int type_pred, int FS_en, int *hp_guess)
957{
958    AVCCommonObj *video = encvid->common;
959    AVCPictureData *currPic = video->currPic;
960    AVCSeqParamSet *currSPS = video->currSeqParams;
961    AVCRateControl *rateCtrl = encvid->rateCtrl;
962    AVCMacroblock *currMB = video->currMB;
963    uint8 *ref, *cand, *ncand;
964    void *extra_info = encvid->sad_extra_info;
965    int mbnum = video->mbNum;
966    int width = currPic->width; /* 6/12/01, must be multiple of 16 */
967    int height = currPic->height;
968    AVCMV *mot16x16 = encvid->mot16x16;
969    int (*SAD_Macroblock)(uint8*, uint8*, int, void*) = encvid->functionPointer->SAD_Macroblock;
970
971    int range = rateCtrl->mvRange;
972
973    int lx = currPic->pitch; /*  padding */
974    int i, j, imin, jmin, ilow, ihigh, jlow, jhigh;
975    int d, dmin, dn[9];
976    int k;
977    int mvx[5], mvy[5];
978    int num_can, center_again;
979    int last_loc, new_loc = 0;
980    int step, max_step = range >> 1;
981    int next;
982
983    int cmvx, cmvy; /* estimated predicted MV */
984    int lev_idx;
985    int lambda_motion = encvid->lambda_motion;
986    uint8 *mvbits = encvid->mvbits;
987    int mvshift = 2;
988    int mvcost;
989
990    int min_sad = 65535;
991
992    ref = video->RefPicList0[DEFAULT_REF_IDX]->Sl; /* origin of actual frame */
993
994    /* have to initialize these params, necessary for interprediction part */
995    currMB->NumMbPart = 1;
996    currMB->SubMbPartHeight[0] = 16;
997    currMB->SubMbPartWidth[0] = 16;
998    currMB->NumSubMbPart[0] = 1;
999    currMB->ref_idx_L0[0] = currMB->ref_idx_L0[1] =
1000                                currMB->ref_idx_L0[2] = currMB->ref_idx_L0[3] = DEFAULT_REF_IDX;
1001    currMB->ref_idx_L1[0] = currMB->ref_idx_L1[1] =
1002                                currMB->ref_idx_L1[2] = currMB->ref_idx_L1[3] = DEFAULT_REF_IDX;
1003    currMB->RefIdx[0] = currMB->RefIdx[1] =
1004                            currMB->RefIdx[2] = currMB->RefIdx[3] = video->RefPicList0[DEFAULT_REF_IDX]->RefIdx;
1005
1006    cur = encvid->currYMB; /* use smaller memory space for current MB */
1007
1008    /*  find limit of the search (adjusting search range)*/
1009    lev_idx = mapLev2Idx[currSPS->level_idc];
1010
1011    /* we can make this part dynamic based on previous statistics */
1012    ilow = i0 - range;
1013    if (i0 - ilow > 2047) /* clip to conform with the standard */
1014    {
1015        ilow = i0 - 2047;
1016    }
1017    if (ilow < -13)  // change it from -15 to -13 because of 6-tap filter needs extra 2 lines.
1018    {
1019        ilow = -13;
1020    }
1021
1022    ihigh = i0 + range - 1;
1023    if (ihigh - i0 > 2047) /* clip to conform with the standard */
1024    {
1025        ihigh = i0 + 2047;
1026    }
1027    if (ihigh > width - 3)
1028    {
1029        ihigh = width - 3;  // change from width-1 to width-3 for the same reason as above
1030    }
1031
1032    jlow = j0 - range;
1033    if (j0 - jlow > MaxVmvR[lev_idx] - 1) /* clip to conform with the standard */
1034    {
1035        jlow = j0 - MaxVmvR[lev_idx] + 1;
1036    }
1037    if (jlow < -13)     // same reason as above
1038    {
1039        jlow = -13;
1040    }
1041
1042    jhigh = j0 + range - 1;
1043    if (jhigh - j0 > MaxVmvR[lev_idx] - 1) /* clip to conform with the standard */
1044    {
1045        jhigh = j0 + MaxVmvR[lev_idx] - 1;
1046    }
1047    if (jhigh > height - 3) // same reason as above
1048    {
1049        jhigh = height - 3;
1050    }
1051
1052    /* find initial motion vector & predicted MV*/
1053    AVCCandidateSelection(mvx, mvy, &num_can, i0 >> 4, j0 >> 4, encvid, type_pred, &cmvx, &cmvy);
1054
1055    imin = i0;
1056    jmin = j0; /* needed for fullsearch */
1057    ncand = ref + i0 + j0 * lx;
1058
1059    /* for first row of MB, fullsearch can be used */
1060    if (FS_en)
1061    {
1062        *hp_guess = 0; /* no guess for fast half-pel */
1063
1064        dmin =  AVCFullSearch(encvid, ref, cur, &imin, &jmin, ilow, ihigh, jlow, jhigh, cmvx, cmvy);
1065
1066        ncand = ref + imin + jmin * lx;
1067    }
1068    else
1069    {   /*       fullsearch the top row to only upto (0,3) MB */
1070        /*       upto 30% complexity saving with the same complexity */
1071        if (video->PrevRefFrameNum == 0 && j0 == 0 && i0 <= 64 && type_pred != 1)
1072        {
1073            *hp_guess = 0; /* no guess for fast half-pel */
1074            dmin =  AVCFullSearch(encvid, ref, cur, &imin, &jmin, ilow, ihigh, jlow, jhigh, cmvx, cmvy);
1075            ncand = ref + imin + jmin * lx;
1076        }
1077        else
1078        {
1079            /************** initialize candidate **************************/
1080
1081            dmin = 65535;
1082
1083            /* check if all are equal */
1084            if (num_can == ALL_CAND_EQUAL)
1085            {
1086                i = i0 + mvx[0];
1087                j = j0 + mvy[0];
1088
1089                if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
1090                {
1091                    cand = ref + i + j * lx;
1092
1093                    d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
1094                    mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
1095                    d +=  mvcost;
1096
1097                    if (d < dmin)
1098                    {
1099                        dmin = d;
1100                        imin = i;
1101                        jmin = j;
1102                        ncand = cand;
1103                        min_sad = d - mvcost; // for rate control
1104                    }
1105                }
1106            }
1107            else
1108            {
1109                /************** evaluate unique candidates **********************/
1110                for (k = 0; k < num_can; k++)
1111                {
1112                    i = i0 + mvx[k];
1113                    j = j0 + mvy[k];
1114
1115                    if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
1116                    {
1117                        cand = ref + i + j * lx;
1118                        d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
1119                        mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
1120                        d +=  mvcost;
1121
1122                        if (d < dmin)
1123                        {
1124                            dmin = d;
1125                            imin = i;
1126                            jmin = j;
1127                            ncand = cand;
1128                            min_sad = d - mvcost; // for rate control
1129                        }
1130                    }
1131                }
1132            }
1133
1134            /******************* local refinement ***************************/
1135            center_again = 0;
1136            last_loc = new_loc = 0;
1137            //          ncand = ref + jmin*lx + imin;  /* center of the search */
1138            step = 0;
1139            dn[0] = dmin;
1140            while (!center_again && step <= max_step)
1141            {
1142
1143                AVCMoveNeighborSAD(dn, last_loc);
1144
1145                center_again = 1;
1146                i = imin;
1147                j = jmin - 1;
1148                cand = ref + i + j * lx;
1149
1150                /*  starting from [0,-1] */
1151                /* spiral check one step at a time*/
1152                for (k = 2; k <= 8; k += 2)
1153                {
1154                    if (!tab_exclude[last_loc][k]) /* exclude last step computation */
1155                    {       /* not already computed */
1156                        if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
1157                        {
1158                            d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
1159                            mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
1160                            d += mvcost;
1161
1162                            dn[k] = d; /* keep it for half pel use */
1163
1164                            if (d < dmin)
1165                            {
1166                                ncand = cand;
1167                                dmin = d;
1168                                imin = i;
1169                                jmin = j;
1170                                center_again = 0;
1171                                new_loc = k;
1172                                min_sad = d - mvcost; // for rate control
1173                            }
1174                        }
1175                    }
1176                    if (k == 8)  /* end side search*/
1177                    {
1178                        if (!center_again)
1179                        {
1180                            k = -1; /* start diagonal search */
1181                            cand -= lx;
1182                            j--;
1183                        }
1184                    }
1185                    else
1186                    {
1187                        next = refine_next[k][0];
1188                        i += next;
1189                        cand += next;
1190                        next = refine_next[k][1];
1191                        j += next;
1192                        cand += lx * next;
1193                    }
1194                }
1195                last_loc = new_loc;
1196                step ++;
1197            }
1198            if (!center_again)
1199                AVCMoveNeighborSAD(dn, last_loc);
1200
1201            *hp_guess = AVCFindMin(dn);
1202
1203            encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0;
1204        }
1205    }
1206
1207    mot16x16[mbnum].sad = dmin;
1208    mot16x16[mbnum].x = (imin - i0) << 2;
1209    mot16x16[mbnum].y = (jmin - j0) << 2;
1210    best_cand[0] = ncand;
1211
1212    if (rateCtrl->subPelEnable) // always enable half-pel search
1213    {
1214        /* find half-pel resolution motion vector */
1215        min_sad = AVCFindHalfPelMB(encvid, cur, mot16x16 + mbnum, best_cand[0], i0, j0, *hp_guess, cmvx, cmvy);
1216
1217        encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0;
1218
1219
1220        if (encvid->best_qpel_pos == -1)
1221        {
1222            ncand = encvid->hpel_cand[encvid->best_hpel_pos];
1223        }
1224        else
1225        {
1226            ncand = encvid->qpel_cand[encvid->best_qpel_pos];
1227        }
1228    }
1229    else
1230    {
1231        encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0;
1232    }
1233
1234    /** do motion comp here for now */
1235    ref = currPic->Sl + i0 + j0 * lx;
1236    /* copy from the best result to current Picture */
1237    for (j = 0; j < 16; j++)
1238    {
1239        for (i = 0; i < 16; i++)
1240        {
1241            *ref++ = *ncand++;
1242        }
1243        ref += (lx - 16);
1244        ncand += 8;
1245    }
1246
1247    return ;
1248}
1249
1250#endif
1251
1252/*===============================================================================
1253    Function:   AVCFullSearch
1254    Date:       09/16/2000
1255    Purpose:    Perform full-search motion estimation over the range of search
1256                region in a spiral-outward manner.
1257    Input/Output:   VideoEncData, current Vol, previou Vop, pointer to the left corner of
1258                current VOP, current coord (also output), boundaries.
1259===============================================================================*/
1260int AVCFullSearch(AVCEncObject *encvid, uint8 *prev, uint8 *cur,
1261                  int *imin, int *jmin, int ilow, int ihigh, int jlow, int jhigh,
1262                  int cmvx, int cmvy)
1263{
1264    int range = encvid->rateCtrl->mvRange;
1265    AVCPictureData *currPic = encvid->common->currPic;
1266    uint8 *cand;
1267    int i, j, k, l;
1268    int d, dmin;
1269    int i0 = *imin; /* current position */
1270    int j0 = *jmin;
1271    int (*SAD_Macroblock)(uint8*, uint8*, int, void*) = encvid->functionPointer->SAD_Macroblock;
1272    void *extra_info = encvid->sad_extra_info;
1273    int lx = currPic->pitch; /* with padding */
1274
1275    int offset = i0 + j0 * lx;
1276
1277    int lambda_motion = encvid->lambda_motion;
1278    uint8 *mvbits = encvid->mvbits;
1279    int mvshift = 2;
1280    int mvcost;
1281    int min_sad;
1282
1283    cand = prev + offset;
1284
1285    dmin  = (*SAD_Macroblock)(cand, cur, (65535 << 16) | lx, (void*)extra_info);
1286    mvcost = MV_COST(lambda_motion, mvshift, 0, 0, cmvx, cmvy);
1287    min_sad = dmin;
1288    dmin += mvcost;
1289
1290    /* perform spiral search */
1291    for (k = 1; k <= range; k++)
1292    {
1293
1294        i = i0 - k;
1295        j = j0 - k;
1296
1297        cand = prev + i + j * lx;
1298
1299        for (l = 0; l < 8*k; l++)
1300        {
1301            /* no need for boundary checking again */
1302            if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
1303            {
1304                d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, (void*)extra_info);
1305                mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy);
1306                d +=  mvcost;
1307
1308                if (d < dmin)
1309                {
1310                    dmin = d;
1311                    *imin = i;
1312                    *jmin = j;
1313                    min_sad = d - mvcost;
1314                }
1315            }
1316
1317            if (l < (k << 1))
1318            {
1319                i++;
1320                cand++;
1321            }
1322            else if (l < (k << 2))
1323            {
1324                j++;
1325                cand += lx;
1326            }
1327            else if (l < ((k << 2) + (k << 1)))
1328            {
1329                i--;
1330                cand--;
1331            }
1332            else
1333            {
1334                j--;
1335                cand -= lx;
1336            }
1337        }
1338    }
1339
1340    encvid->rateCtrl->MADofMB[encvid->common->mbNum] = (min_sad / 256.0); // for rate control
1341
1342    return dmin;
1343}
1344
1345/*===============================================================================
1346    Function:   AVCCandidateSelection
1347    Date:       09/16/2000
1348    Purpose:    Fill up the list of candidate using spatio-temporal correlation
1349                among neighboring blocks.
1350    Input/Output:   type_pred = 0: first pass, 1: second pass, or no SCD
1351    Modified:   , 09/23/01, get rid of redundant candidates before passing back.
1352                , 09/11/07, added return for modified predicted MV, this will be
1353                    needed for both fast search and fullsearch.
1354===============================================================================*/
1355
1356void AVCCandidateSelection(int *mvx, int *mvy, int *num_can, int imb, int jmb,
1357                           AVCEncObject *encvid, int type_pred, int *cmvx, int *cmvy)
1358{
1359    AVCCommonObj *video = encvid->common;
1360    AVCMV *mot16x16 = encvid->mot16x16;
1361    AVCMV *pmot;
1362    int mbnum = video->mbNum;
1363    int mbwidth = video->PicWidthInMbs;
1364    int mbheight = video->PicHeightInMbs;
1365    int i, j, same, num1;
1366
1367    /* this part is for predicted MV */
1368    int pmvA_x = 0, pmvA_y = 0, pmvB_x = 0, pmvB_y = 0, pmvC_x = 0, pmvC_y = 0;
1369    int availA = 0, availB = 0, availC = 0;
1370
1371    *num_can = 0;
1372
1373    if (video->PrevRefFrameNum != 0) // previous frame is an IDR frame
1374    {
1375        /* Spatio-Temporal Candidate (five candidates) */
1376        if (type_pred == 0) /* first pass */
1377        {
1378            pmot = &mot16x16[mbnum]; /* same coordinate previous frame */
1379            mvx[(*num_can)] = (pmot->x) >> 2;
1380            mvy[(*num_can)++] = (pmot->y) >> 2;
1381            if (imb >= (mbwidth >> 1) && imb > 0)  /*left neighbor previous frame */
1382            {
1383                pmot = &mot16x16[mbnum-1];
1384                mvx[(*num_can)] = (pmot->x) >> 2;
1385                mvy[(*num_can)++] = (pmot->y) >> 2;
1386            }
1387            else if (imb + 1 < mbwidth)   /*right neighbor previous frame */
1388            {
1389                pmot = &mot16x16[mbnum+1];
1390                mvx[(*num_can)] = (pmot->x) >> 2;
1391                mvy[(*num_can)++] = (pmot->y) >> 2;
1392            }
1393
1394            if (jmb < mbheight - 1)  /*bottom neighbor previous frame */
1395            {
1396                pmot = &mot16x16[mbnum+mbwidth];
1397                mvx[(*num_can)] = (pmot->x) >> 2;
1398                mvy[(*num_can)++] = (pmot->y) >> 2;
1399            }
1400            else if (jmb > 0)   /*upper neighbor previous frame */
1401            {
1402                pmot = &mot16x16[mbnum-mbwidth];
1403                mvx[(*num_can)] = (pmot->x) >> 2;
1404                mvy[(*num_can)++] = (pmot->y) >> 2;
1405            }
1406
1407            if (imb > 0 && jmb > 0)  /* upper-left neighbor current frame*/
1408            {
1409                pmot = &mot16x16[mbnum-mbwidth-1];
1410                mvx[(*num_can)] = (pmot->x) >> 2;
1411                mvy[(*num_can)++] = (pmot->y) >> 2;
1412            }
1413            if (jmb > 0 && imb < mbheight - 1)  /* upper right neighbor current frame*/
1414            {
1415                pmot = &mot16x16[mbnum-mbwidth+1];
1416                mvx[(*num_can)] = (pmot->x) >> 2;
1417                mvy[(*num_can)++] = (pmot->y) >> 2;
1418            }
1419        }
1420        else    /* second pass */
1421            /* original ST1 algorithm */
1422        {
1423            pmot = &mot16x16[mbnum]; /* same coordinate previous frame */
1424            mvx[(*num_can)] = (pmot->x) >> 2;
1425            mvy[(*num_can)++] = (pmot->y) >> 2;
1426
1427            if (imb > 0)  /*left neighbor current frame */
1428            {
1429                pmot = &mot16x16[mbnum-1];
1430                mvx[(*num_can)] = (pmot->x) >> 2;
1431                mvy[(*num_can)++] = (pmot->y) >> 2;
1432            }
1433            if (jmb > 0)  /*upper neighbor current frame */
1434            {
1435                pmot = &mot16x16[mbnum-mbwidth];
1436                mvx[(*num_can)] = (pmot->x) >> 2;
1437                mvy[(*num_can)++] = (pmot->y) >> 2;
1438            }
1439            if (imb < mbwidth - 1)  /*right neighbor previous frame */
1440            {
1441                pmot = &mot16x16[mbnum+1];
1442                mvx[(*num_can)] = (pmot->x) >> 2;
1443                mvy[(*num_can)++] = (pmot->y) >> 2;
1444            }
1445            if (jmb < mbheight - 1)  /*bottom neighbor previous frame */
1446            {
1447                pmot = &mot16x16[mbnum+mbwidth];
1448                mvx[(*num_can)] = (pmot->x) >> 2;
1449                mvy[(*num_can)++] = (pmot->y) >> 2;
1450            }
1451        }
1452
1453        /* get predicted MV */
1454        if (imb > 0)    /* get MV from left (A) neighbor either on current or previous frame */
1455        {
1456            availA = 1;
1457            pmot = &mot16x16[mbnum-1];
1458            pmvA_x = pmot->x;
1459            pmvA_y = pmot->y;
1460        }
1461
1462        if (jmb > 0) /* get MV from top (B) neighbor either on current or previous frame */
1463        {
1464            availB = 1;
1465            pmot = &mot16x16[mbnum-mbwidth];
1466            pmvB_x = pmot->x;
1467            pmvB_y = pmot->y;
1468
1469            availC = 1;
1470
1471            if (imb < mbwidth - 1) /* get MV from top-right (C) neighbor of current frame */
1472            {
1473                pmot = &mot16x16[mbnum-mbwidth+1];
1474            }
1475            else /* get MV from top-left (D) neighbor of current frame */
1476            {
1477                pmot = &mot16x16[mbnum-mbwidth-1];
1478            }
1479            pmvC_x = pmot->x;
1480            pmvC_y = pmot->y;
1481        }
1482
1483    }
1484    else  /* only Spatial Candidate (four candidates)*/
1485    {
1486        if (type_pred == 0) /*first pass*/
1487        {
1488            if (imb > 1)  /* neighbor two blocks away to the left */
1489            {
1490                pmot = &mot16x16[mbnum-2];
1491                mvx[(*num_can)] = (pmot->x) >> 2;
1492                mvy[(*num_can)++] = (pmot->y) >> 2;
1493            }
1494            if (imb > 0 && jmb > 0)  /* upper-left neighbor */
1495            {
1496                pmot = &mot16x16[mbnum-mbwidth-1];
1497                mvx[(*num_can)] = (pmot->x) >> 2;
1498                mvy[(*num_can)++] = (pmot->y) >> 2;
1499            }
1500            if (jmb > 0 && imb < mbheight - 1)  /* upper right neighbor */
1501            {
1502                pmot = &mot16x16[mbnum-mbwidth+1];
1503                mvx[(*num_can)] = (pmot->x) >> 2;
1504                mvy[(*num_can)++] = (pmot->y) >> 2;
1505            }
1506
1507            /* get predicted MV */
1508            if (imb > 1)    /* get MV from 2nd left (A) neighbor either of current frame */
1509            {
1510                availA = 1;
1511                pmot = &mot16x16[mbnum-2];
1512                pmvA_x = pmot->x;
1513                pmvA_y = pmot->y;
1514            }
1515
1516            if (jmb > 0 && imb > 0) /* get MV from top-left (B) neighbor of current frame */
1517            {
1518                availB = 1;
1519                pmot = &mot16x16[mbnum-mbwidth-1];
1520                pmvB_x = pmot->x;
1521                pmvB_y = pmot->y;
1522            }
1523
1524            if (jmb > 0 && imb < mbwidth - 1)
1525            {
1526                availC = 1;
1527                pmot = &mot16x16[mbnum-mbwidth+1];
1528                pmvC_x = pmot->x;
1529                pmvC_y = pmot->y;
1530            }
1531        }
1532//#ifdef SCENE_CHANGE_DETECTION
1533        /* second pass (ST2 algorithm)*/
1534        else
1535        {
1536            if (type_pred == 1) /*  4/7/01 */
1537            {
1538                if (imb > 0)  /*left neighbor current frame */
1539                {
1540                    pmot = &mot16x16[mbnum-1];
1541                    mvx[(*num_can)] = (pmot->x) >> 2;
1542                    mvy[(*num_can)++] = (pmot->y) >> 2;
1543                }
1544                if (jmb > 0)  /*upper neighbor current frame */
1545                {
1546                    pmot = &mot16x16[mbnum-mbwidth];
1547                    mvx[(*num_can)] = (pmot->x) >> 2;
1548                    mvy[(*num_can)++] = (pmot->y) >> 2;
1549                }
1550                if (imb < mbwidth - 1)  /*right neighbor current frame */
1551                {
1552                    pmot = &mot16x16[mbnum+1];
1553                    mvx[(*num_can)] = (pmot->x) >> 2;
1554                    mvy[(*num_can)++] = (pmot->y) >> 2;
1555                }
1556                if (jmb < mbheight - 1)  /*bottom neighbor current frame */
1557                {
1558                    pmot = &mot16x16[mbnum+mbwidth];
1559                    mvx[(*num_can)] = (pmot->x) >> 2;
1560                    mvy[(*num_can)++] = (pmot->y) >> 2;
1561                }
1562            }
1563            //#else
1564            else /* original ST1 algorithm */
1565            {
1566                if (imb > 0)  /*left neighbor current frame */
1567                {
1568                    pmot = &mot16x16[mbnum-1];
1569                    mvx[(*num_can)] = (pmot->x) >> 2;
1570                    mvy[(*num_can)++] = (pmot->y) >> 2;
1571
1572                    if (jmb > 0)  /*upper-left neighbor current frame */
1573                    {
1574                        pmot = &mot16x16[mbnum-mbwidth-1];
1575                        mvx[(*num_can)] = (pmot->x) >> 2;
1576                        mvy[(*num_can)++] = (pmot->y) >> 2;
1577                    }
1578
1579                }
1580                if (jmb > 0)  /*upper neighbor current frame */
1581                {
1582                    pmot = &mot16x16[mbnum-mbwidth];
1583                    mvx[(*num_can)] = (pmot->x) >> 2;
1584                    mvy[(*num_can)++] = (pmot->y) >> 2;
1585
1586                    if (imb < mbheight - 1)  /*upper-right neighbor current frame */
1587                    {
1588                        pmot = &mot16x16[mbnum-mbwidth+1];
1589                        mvx[(*num_can)] = (pmot->x) >> 2;
1590                        mvy[(*num_can)++] = (pmot->y) >> 2;
1591                    }
1592                }
1593            }
1594
1595            /* get predicted MV */
1596            if (imb > 0)    /* get MV from left (A) neighbor either on current or previous frame */
1597            {
1598                availA = 1;
1599                pmot = &mot16x16[mbnum-1];
1600                pmvA_x = pmot->x;
1601                pmvA_y = pmot->y;
1602            }
1603
1604            if (jmb > 0) /* get MV from top (B) neighbor either on current or previous frame */
1605            {
1606                availB = 1;
1607                pmot = &mot16x16[mbnum-mbwidth];
1608                pmvB_x = pmot->x;
1609                pmvB_y = pmot->y;
1610
1611                availC = 1;
1612
1613                if (imb < mbwidth - 1) /* get MV from top-right (C) neighbor of current frame */
1614                {
1615                    pmot = &mot16x16[mbnum-mbwidth+1];
1616                }
1617                else /* get MV from top-left (D) neighbor of current frame */
1618                {
1619                    pmot = &mot16x16[mbnum-mbwidth-1];
1620                }
1621                pmvC_x = pmot->x;
1622                pmvC_y = pmot->y;
1623            }
1624        }
1625//#endif
1626    }
1627
1628    /*  3/23/01, remove redundant candidate (possible k-mean) */
1629    num1 = *num_can;
1630    *num_can = 1;
1631    for (i = 1; i < num1; i++)
1632    {
1633        same = 0;
1634        j = 0;
1635        while (!same && j < *num_can)
1636        {
1637#if (CANDIDATE_DISTANCE==0)
1638            if (mvx[i] == mvx[j] && mvy[i] == mvy[j])
1639#else
1640            // modified k-mean,  3/24/01, shouldn't be greater than 3
1641            if (AVC_ABS(mvx[i] - mvx[j]) + AVC_ABS(mvy[i] - mvy[j]) < CANDIDATE_DISTANCE)
1642#endif
1643                same = 1;
1644            j++;
1645        }
1646        if (!same)
1647        {
1648            mvx[*num_can] = mvx[i];
1649            mvy[*num_can] = mvy[i];
1650            (*num_can)++;
1651        }
1652    }
1653
1654    if (num1 == 5 && *num_can == 1)
1655        *num_can = ALL_CAND_EQUAL; /* all are equal */
1656
1657    /* calculate predicted MV */
1658
1659    if (availA && !(availB || availC))
1660    {
1661        *cmvx = pmvA_x;
1662        *cmvy = pmvA_y;
1663    }
1664    else
1665    {
1666        *cmvx = AVC_MEDIAN(pmvA_x, pmvB_x, pmvC_x);
1667        *cmvy = AVC_MEDIAN(pmvA_y, pmvB_y, pmvC_y);
1668    }
1669
1670    return ;
1671}
1672
1673
1674/*************************************************************
1675    Function:   AVCMoveNeighborSAD
1676    Date:       3/27/01
1677    Purpose:    Move neighboring SAD around when center has shifted
1678*************************************************************/
1679
1680void AVCMoveNeighborSAD(int dn[], int new_loc)
1681{
1682    int tmp[9];
1683    tmp[0] = dn[0];
1684    tmp[1] = dn[1];
1685    tmp[2] = dn[2];
1686    tmp[3] = dn[3];
1687    tmp[4] = dn[4];
1688    tmp[5] = dn[5];
1689    tmp[6] = dn[6];
1690    tmp[7] = dn[7];
1691    tmp[8] = dn[8];
1692    dn[0] = dn[1] = dn[2] = dn[3] = dn[4] = dn[5] = dn[6] = dn[7] = dn[8] = 65536;
1693
1694    switch (new_loc)
1695    {
1696        case 0:
1697            break;
1698        case 1:
1699            dn[4] = tmp[2];
1700            dn[5] = tmp[0];
1701            dn[6] = tmp[8];
1702            break;
1703        case 2:
1704            dn[4] = tmp[3];
1705            dn[5] = tmp[4];
1706            dn[6] = tmp[0];
1707            dn[7] = tmp[8];
1708            dn[8] = tmp[1];
1709            break;
1710        case 3:
1711            dn[6] = tmp[4];
1712            dn[7] = tmp[0];
1713            dn[8] = tmp[2];
1714            break;
1715        case 4:
1716            dn[1] = tmp[2];
1717            dn[2] = tmp[3];
1718            dn[6] = tmp[5];
1719            dn[7] = tmp[6];
1720            dn[8] = tmp[0];
1721            break;
1722        case 5:
1723            dn[1] = tmp[0];
1724            dn[2] = tmp[4];
1725            dn[8] = tmp[6];
1726            break;
1727        case 6:
1728            dn[1] = tmp[8];
1729            dn[2] = tmp[0];
1730            dn[3] = tmp[4];
1731            dn[4] = tmp[5];
1732            dn[8] = tmp[7];
1733            break;
1734        case 7:
1735            dn[2] = tmp[8];
1736            dn[3] = tmp[0];
1737            dn[4] = tmp[6];
1738            break;
1739        case 8:
1740            dn[2] = tmp[1];
1741            dn[3] = tmp[2];
1742            dn[4] = tmp[0];
1743            dn[5] = tmp[6];
1744            dn[6] = tmp[7];
1745            break;
1746    }
1747    dn[0] = tmp[new_loc];
1748
1749    return ;
1750}
1751
1752/*  3/28/01, find minimal of dn[9] */
1753
1754int AVCFindMin(int dn[])
1755{
1756    int min, i;
1757    int dmin;
1758
1759    dmin = dn[1];
1760    min = 1;
1761    for (i = 2; i < 9; i++)
1762    {
1763        if (dn[i] < dmin)
1764        {
1765            dmin = dn[i];
1766            min = i;
1767        }
1768    }
1769
1770    return min;
1771}
1772
1773
1774
1775