1/* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18#include "mp4def.h"
19#include "mp4enc_lib.h"
20#include "mp4lib_int.h"
21#include "m4venc_oscl.h"
22
23//#define PRINT_MV
24#define MIN_GOP 1   /* minimum size of GOP,  1/23/01, need to be tested */
25
26#define CANDIDATE_DISTANCE  0 /* distance candidate from one another to consider as a distinct one */
27/* shouldn't be more than 3 */
28
29#define ZERO_MV_PREF    0 /* 0: bias (0,0)MV before full-pel search, lowest complexity*/
30/* 1: bias (0,0)MV after full-pel search, before half-pel, highest comp */
31/* 2: bias (0,0)MV after half-pel, high comp, better PSNR */
32
33#define RASTER_REFRESH  /* instead of random INTRA refresh, do raster scan,  2/26/01 */
34
35#ifdef RASTER_REFRESH
36#define TARGET_REFRESH_PER_REGION 4 /* , no. MB per frame to be INTRA refreshed */
37#else
38#define TARGET_REFRESH_PER_REGION 1 /* , no. MB per region to be INTRA refreshed */
39#endif
40
41#define ALL_CAND_EQUAL  10  /*  any number greater than 5 will work */
42
43#define NumPixelMB  256     /*  number of pixels used in SAD calculation */
44
45#define DEF_8X8_WIN 3   /* search region for 8x8 MVs around the 16x16 MV */
46#define MB_Nb  256
47
48#define PREF_NULL_VEC 129   /* for zero vector bias */
49#define PREF_16_VEC 129     /* 1MV bias versus 4MVs*/
50#define PREF_INTRA  512     /* bias for INTRA coding */
51
52const static Int tab_exclude[9][9] =  // [last_loc][curr_loc]
53{
54    {0, 0, 0, 0, 0, 0, 0, 0, 0},
55    {0, 0, 0, 0, 1, 1, 1, 0, 0},
56    {0, 0, 0, 0, 1, 1, 1, 1, 1},
57    {0, 0, 0, 0, 0, 0, 1, 1, 1},
58    {0, 1, 1, 0, 0, 0, 1, 1, 1},
59    {0, 1, 1, 0, 0, 0, 0, 0, 1},
60    {0, 1, 1, 1, 1, 0, 0, 0, 1},
61    {0, 0, 1, 1, 1, 0, 0, 0, 0},
62    {0, 0, 1, 1, 1, 1, 1, 0, 0}
63}; //to decide whether to continue or compute
64
65const static Int refine_next[8][2] =    /* [curr_k][increment] */
66{
67    {0, 0}, {2, 0}, {1, 1}, {0, 2}, { -1, 1}, { -2, 0}, { -1, -1}, {0, -2}
68};
69
70#ifdef __cplusplus
71extern "C"
72{
73#endif
74
75    void MBMotionSearch(VideoEncData *video, UChar *cur, UChar *best_cand[],
76    Int i0, Int j0, Int type_pred, Int fullsearch, Int *hp_guess);
77
78    Int  fullsearch(VideoEncData *video, Vol *currVol, UChar *ref, UChar *cur,
79                    Int *imin, Int *jmin, Int ilow, Int ihigh, Int jlow, Int jhigh);
80    Int fullsearchBlk(VideoEncData *video, Vol *currVol, UChar *cent, UChar *cur,
81                      Int *imin, Int *jmin, Int ilow, Int ihigh, Int jlow, Int jhigh, Int range);
82    void CandidateSelection(Int *mvx, Int *mvy, Int *num_can, Int imb, Int jmb,
83                            VideoEncData *video, Int type_pred);
84    void RasterIntraUpdate(UChar *intraArray, UChar *Mode, Int totalMB, Int numRefresh);
85    void ResetIntraUpdate(UChar *intraArray, Int totalMB);
86    void ResetIntraUpdateRegion(UChar *intraArray, Int start_i, Int rwidth,
87                                Int start_j, Int rheight, Int mbwidth, Int mbheight);
88
89    void MoveNeighborSAD(Int dn[], Int new_loc);
90    Int FindMin(Int dn[]);
91    void PrepareCurMB(VideoEncData *video, UChar *cur);
92
93#ifdef __cplusplus
94}
95#endif
96
97/***************************************/
98/*  2/28/01, for HYPOTHESIS TESTING */
99#ifdef HTFM     /* defined in mp4def.h */
100#ifdef __cplusplus
101extern "C"
102{
103#endif
104    void CalcThreshold(double pf, double exp_lamda[], Int nrmlz_th[]);
105    void    HTFMPrepareCurMB(VideoEncData *video, HTFM_Stat *htfm_stat, UChar *cur);
106#ifdef __cplusplus
107}
108#endif
109
110
111#define HTFM_Pf  0.25   /* 3/2/1, probability of false alarm, can be varied from 0 to 0.5 */
112/***************************************/
113#endif
114
115#ifdef _SAD_STAT
116ULong num_MB = 0;
117ULong num_HP_MB = 0;
118ULong num_Blk = 0;
119ULong num_HP_Blk = 0;
120ULong num_cand = 0;
121ULong num_better_hp = 0;
122ULong i_dist_from_guess = 0;
123ULong j_dist_from_guess = 0;
124ULong num_hp_not_zero = 0;
125#endif
126
127
128
129/*==================================================================
130    Function:   MotionEstimation
131    Date:       10/3/2000
132    Purpose:    Go through all macroblock for motion search and
133                determine scene change detection.
134====================================================================*/
135
136void MotionEstimation(VideoEncData *video)
137{
138    UChar use_4mv = video->encParams->MV8x8_Enabled;
139    Vol *currVol = video->vol[video->currLayer];
140    Vop *currVop = video->currVop;
141    VideoEncFrameIO *currFrame = video->input;
142    Int i, j, comp;
143    Int mbwidth = currVol->nMBPerRow;
144    Int mbheight = currVol->nMBPerCol;
145    Int totalMB = currVol->nTotalMB;
146    Int width = currFrame->pitch;
147    UChar *mode_mb, *Mode = video->headerInfo.Mode;
148    MOT *mot_mb, **mot = video->mot;
149    UChar *intraArray = video->intraArray;
150    Int FS_en = video->encParams->FullSearch_Enabled;
151    void (*ComputeMBSum)(UChar *, Int, MOT *) = video->functionPointer->ComputeMBSum;
152    void (*ChooseMode)(UChar*, UChar*, Int, Int) = video->functionPointer->ChooseMode;
153
154    Int numIntra, start_i, numLoop, incr_i;
155    Int mbnum, offset;
156    UChar *cur, *best_cand[5];
157    Int sad8 = 0, sad16 = 0;
158    Int totalSAD = 0;   /* average SAD for rate control */
159    Int skip_halfpel_4mv;
160    Int f_code_p, f_code_n, max_mag = 0, min_mag = 0;
161    Int type_pred;
162    Int xh[5] = {0, 0, 0, 0, 0};
163    Int yh[5] = {0, 0, 0, 0, 0}; /* half-pel */
164    UChar hp_mem4MV[17*17*4];
165
166#ifdef HTFM
167    /***** HYPOTHESIS TESTING ********/  /* 2/28/01 */
168    Int collect = 0;
169    HTFM_Stat htfm_stat;
170    double newvar[16];
171    double exp_lamda[15];
172    /*********************************/
173#endif
174    Int hp_guess = 0;
175#ifdef PRINT_MV
176    FILE *fp_debug;
177#endif
178
179//  FILE *fstat;
180//  static int frame_num = 0;
181
182    offset = 0;
183
184    if (video->currVop->predictionType == I_VOP)
185    {   /* compute the SAV */
186        mbnum = 0;
187        cur = currFrame->yChan;
188
189        for (j = 0; j < mbheight; j++)
190        {
191            for (i = 0; i < mbwidth; i++)
192            {
193                video->mbnum = mbnum;
194                mot_mb = mot[mbnum];
195
196                (*ComputeMBSum)(cur + (i << 4), width, mot_mb);
197
198                totalSAD += mot_mb[0].sad;
199
200                mbnum++;
201            }
202            cur += (width << 4);
203        }
204
205        video->sumMAD = (float)totalSAD / (float)NumPixelMB;
206
207        ResetIntraUpdate(intraArray, totalMB);
208
209        return  ;
210    }
211
212    /* 09/20/05 */
213    if (video->prevBaseVop->padded == 0 && !video->encParams->H263_Enabled)
214    {
215        PaddingEdge(video->prevBaseVop);
216        video->prevBaseVop->padded = 1;
217    }
218
219    /* Random INTRA update */
220    /*  suggest to do it in CodeMB */
221    /*  2/21/2001 */
222    //if(video->encParams->RC_Type == CBR_1 || video->encParams->RC_Type == CBR_2)
223    if (video->currLayer == 0 && video->encParams->Refresh)
224    {
225        RasterIntraUpdate(intraArray, Mode, totalMB, video->encParams->Refresh);
226    }
227
228    video->sad_extra_info = NULL;
229
230#ifdef HTFM
231    /***** HYPOTHESIS TESTING ********/  /* 2/28/01 */
232    InitHTFM(video, &htfm_stat, newvar, &collect);
233    /*********************************/
234#endif
235
236    if ((video->encParams->SceneChange_Det == 1) /*&& video->currLayer==0 */
237            && ((video->encParams->LayerFrameRate[0] < 5.0) || (video->numVopsInGOP > MIN_GOP)))
238        /* do not try to detect a new scene if low frame rate and too close to previous I-frame */
239    {
240        incr_i = 2;
241        numLoop = 2;
242        start_i = 1;
243        type_pred = 0; /* for initial candidate selection */
244    }
245    else
246    {
247        incr_i = 1;
248        numLoop = 1;
249        start_i = 0;
250        type_pred = 2;
251    }
252
253    /* First pass, loop thru half the macroblock */
254    /* determine scene change */
255    /* Second pass, for the rest of macroblocks */
256    numIntra = 0;
257    while (numLoop--)
258    {
259        for (j = 0; j < mbheight; j++)
260        {
261            if (incr_i > 1)
262                start_i = (start_i == 0 ? 1 : 0) ; /* toggle 0 and 1 */
263
264            offset = width * (j << 4) + (start_i << 4);
265
266            mbnum = j * mbwidth + start_i;
267
268            for (i = start_i; i < mbwidth; i += incr_i)
269            {
270                video->mbnum = mbnum;
271                mot_mb = mot[mbnum];
272                mode_mb = Mode + mbnum;
273
274                cur = currFrame->yChan + offset;
275
276
277                if (*mode_mb != MODE_INTRA)
278                {
279#if defined(HTFM)
280                    HTFMPrepareCurMB(video, &htfm_stat, cur);
281#else
282                    PrepareCurMB(video, cur);
283#endif
284                    /************************************************************/
285                    /******** full-pel 1MV and 4MVs search **********************/
286
287#ifdef _SAD_STAT
288                    num_MB++;
289#endif
290                    MBMotionSearch(video, cur, best_cand, i << 4, j << 4, type_pred,
291                                   FS_en, &hp_guess);
292
293#ifdef PRINT_MV
294                    fp_debug = fopen("c:\\bitstream\\mv1_debug.txt", "a");
295                    fprintf(fp_debug, "#%d (%d,%d,%d) : ", mbnum, mot_mb[0].x, mot_mb[0].y, mot_mb[0].sad);
296                    fprintf(fp_debug, "(%d,%d,%d) : (%d,%d,%d) : (%d,%d,%d) : (%d,%d,%d) : ==>\n",
297                            mot_mb[1].x, mot_mb[1].y, mot_mb[1].sad,
298                            mot_mb[2].x, mot_mb[2].y, mot_mb[2].sad,
299                            mot_mb[3].x, mot_mb[3].y, mot_mb[3].sad,
300                            mot_mb[4].x, mot_mb[4].y, mot_mb[4].sad);
301                    fclose(fp_debug);
302#endif
303                    sad16 = mot_mb[0].sad;
304#ifdef NO_INTER4V
305                    sad8 = sad16;
306#else
307                    sad8 = mot_mb[1].sad + mot_mb[2].sad + mot_mb[3].sad + mot_mb[4].sad;
308#endif
309
310                    /* choose between INTRA or INTER */
311                    (*ChooseMode)(mode_mb, cur, width, ((sad8 < sad16) ? sad8 : sad16));
312                }
313                else    /* INTRA update, use for prediction 3/23/01 */
314                {
315                    mot_mb[0].x = mot_mb[0].y = 0;
316                }
317
318                if (*mode_mb == MODE_INTRA)
319                {
320                    numIntra++ ;
321
322                    /* compute SAV for rate control and fast DCT, 11/28/00 */
323                    (*ComputeMBSum)(cur, width, mot_mb);
324
325                    /* leave mot_mb[0] as it is for fast motion search */
326                    /* set the 4 MVs to zeros */
327                    for (comp = 1; comp <= 4; comp++)
328                    {
329                        mot_mb[comp].x = 0;
330                        mot_mb[comp].y = 0;
331                    }
332#ifdef PRINT_MV
333                    fp_debug = fopen("c:\\bitstream\\mv1_debug.txt", "a");
334                    fprintf(fp_debug, "\n");
335                    fclose(fp_debug);
336#endif
337                }
338                else /* *mode_mb = MODE_INTER;*/
339                {
340                    if (video->encParams->HalfPel_Enabled)
341                    {
342#ifdef _SAD_STAT
343                        num_HP_MB++;
344#endif
345                        /* find half-pel resolution motion vector */
346                        FindHalfPelMB(video, cur, mot_mb, best_cand[0],
347                                      i << 4, j << 4, xh, yh, hp_guess);
348#ifdef PRINT_MV
349                        fp_debug = fopen("c:\\bitstream\\mv1_debug.txt", "a");
350                        fprintf(fp_debug, "(%d,%d), %d\n", mot_mb[0].x, mot_mb[0].y, mot_mb[0].sad);
351                        fclose(fp_debug);
352#endif
353                        skip_halfpel_4mv = ((sad16 - mot_mb[0].sad) <= (MB_Nb >> 1) + 1);
354                        sad16 = mot_mb[0].sad;
355
356#ifndef NO_INTER4V
357                        if (use_4mv && !skip_halfpel_4mv)
358                        {
359                            /* Also decide 1MV or 4MV !!!!!!!!*/
360                            sad8 = FindHalfPelBlk(video, cur, mot_mb, sad16,
361                                                  best_cand, mode_mb, i << 4, j << 4, xh, yh, hp_mem4MV);
362
363#ifdef PRINT_MV
364                            fp_debug = fopen("c:\\bitstream\\mv1_debug.txt", "a");
365                            fprintf(fp_debug, " (%d,%d,%d) : (%d,%d,%d) : (%d,%d,%d) : (%d,%d,%d) \n",
366                                    mot_mb[1].x, mot_mb[1].y, mot_mb[1].sad,
367                                    mot_mb[2].x, mot_mb[2].y, mot_mb[2].sad,
368                                    mot_mb[3].x, mot_mb[3].y, mot_mb[3].sad,
369                                    mot_mb[4].x, mot_mb[4].y, mot_mb[4].sad);
370                            fclose(fp_debug);
371#endif
372                        }
373#endif /* NO_INTER4V */
374                    }
375                    else    /* HalfPel_Enabled ==0  */
376                    {
377#ifndef NO_INTER4V
378                        //if(sad16 < sad8-PREF_16_VEC)
379                        if (sad16 - PREF_16_VEC > sad8)
380                        {
381                            *mode_mb = MODE_INTER4V;
382                        }
383#endif
384                    }
385#if (ZERO_MV_PREF==2)   /* use mot_mb[7].sad as d0 computed in MBMotionSearch*/
386                    /******************************************************/
387                    if (mot_mb[7].sad - PREF_NULL_VEC < sad16 && mot_mb[7].sad - PREF_NULL_VEC < sad8)
388                    {
389                        mot_mb[0].sad = mot_mb[7].sad - PREF_NULL_VEC;
390                        mot_mb[0].x = mot_mb[0].y = 0;
391                        *mode_mb = MODE_INTER;
392                    }
393                    /******************************************************/
394#endif
395                    if (*mode_mb == MODE_INTER)
396                    {
397                        if (mot_mb[0].x == 0 && mot_mb[0].y == 0)   /* use zero vector */
398                            mot_mb[0].sad += PREF_NULL_VEC; /* add back the bias */
399
400                        mot_mb[1].sad = mot_mb[2].sad = mot_mb[3].sad = mot_mb[4].sad = (mot_mb[0].sad + 2) >> 2;
401                        mot_mb[1].x = mot_mb[2].x = mot_mb[3].x = mot_mb[4].x = mot_mb[0].x;
402                        mot_mb[1].y = mot_mb[2].y = mot_mb[3].y = mot_mb[4].y = mot_mb[0].y;
403
404                    }
405                }
406
407                /* find maximum magnitude */
408                /* compute average SAD for rate control, 11/28/00 */
409                if (*mode_mb == MODE_INTER)
410                {
411#ifdef PRINT_MV
412                    fp_debug = fopen("c:\\bitstream\\mv1_debug.txt", "a");
413                    fprintf(fp_debug, "%d MODE_INTER\n", mbnum);
414                    fclose(fp_debug);
415#endif
416                    totalSAD += mot_mb[0].sad;
417                    if (mot_mb[0].x > max_mag)
418                        max_mag = mot_mb[0].x;
419                    if (mot_mb[0].y > max_mag)
420                        max_mag = mot_mb[0].y;
421                    if (mot_mb[0].x < min_mag)
422                        min_mag = mot_mb[0].x;
423                    if (mot_mb[0].y < min_mag)
424                        min_mag = mot_mb[0].y;
425                }
426                else if (*mode_mb == MODE_INTER4V)
427                {
428#ifdef PRINT_MV
429                    fp_debug = fopen("c:\\bitstream\\mv1_debug.txt", "a");
430                    fprintf(fp_debug, "%d MODE_INTER4V\n", mbnum);
431                    fclose(fp_debug);
432#endif
433                    totalSAD += sad8;
434                    for (comp = 1; comp <= 4; comp++)
435                    {
436                        if (mot_mb[comp].x > max_mag)
437                            max_mag = mot_mb[comp].x;
438                        if (mot_mb[comp].y > max_mag)
439                            max_mag = mot_mb[comp].y;
440                        if (mot_mb[comp].x < min_mag)
441                            min_mag = mot_mb[comp].x;
442                        if (mot_mb[comp].y < min_mag)
443                            min_mag = mot_mb[comp].y;
444                    }
445                }
446                else    /* MODE_INTRA */
447                {
448#ifdef PRINT_MV
449                    fp_debug = fopen("c:\\bitstream\\mv1_debug.txt", "a");
450                    fprintf(fp_debug, "%d MODE_INTRA\n", mbnum);
451                    fclose(fp_debug);
452#endif
453                    totalSAD += mot_mb[0].sad;
454                }
455                mbnum += incr_i;
456                offset += (incr_i << 4);
457
458            }
459        }
460
461        if (incr_i > 1 && numLoop) /* scene change on and first loop */
462        {
463            //if(numIntra > ((totalMB>>3)<<1) + (totalMB>>3)) /* 75% of 50%MBs */
464            if (numIntra > (0.30*(totalMB / 2.0))) /* 15% of 50%MBs */
465            {
466                /******** scene change detected *******************/
467                currVop->predictionType = I_VOP;
468                M4VENC_MEMSET(Mode, MODE_INTRA, sizeof(UChar)*totalMB); /* set this for MB level coding*/
469                currVop->quantizer = video->encParams->InitQuantIvop[video->currLayer];
470
471                /* compute the SAV for rate control & fast DCT */
472                totalSAD = 0;
473                offset = 0;
474                mbnum = 0;
475                cur = currFrame->yChan;
476
477                for (j = 0; j < mbheight; j++)
478                {
479                    for (i = 0; i < mbwidth; i++)
480                    {
481                        video->mbnum = mbnum;
482                        mot_mb = mot[mbnum];
483
484
485                        (*ComputeMBSum)(cur + (i << 4), width, mot_mb);
486                        totalSAD += mot_mb[0].sad;
487
488                        mbnum++;
489                    }
490                    cur += (width << 4);
491                }
492
493                video->sumMAD = (float)totalSAD / (float)NumPixelMB;
494                ResetIntraUpdate(intraArray, totalMB);
495                /* video->numVopsInGOP=0; 3/13/01 move it to vop.c*/
496
497                return ;
498            }
499        }
500        /******** no scene change, continue motion search **********************/
501        start_i = 0;
502        type_pred++; /* second pass */
503    }
504
505    video->sumMAD = (float)totalSAD / (float)NumPixelMB;    /* avg SAD */
506
507    /* find f_code , 10/27/2000 */
508    f_code_p = 1;
509    while ((max_mag >> (4 + f_code_p)) > 0)
510        f_code_p++;
511
512    f_code_n = 1;
513    min_mag *= -1;
514    while ((min_mag - 1) >> (4 + f_code_n) > 0)
515        f_code_n++;
516
517    currVop->fcodeForward = (f_code_p > f_code_n ? f_code_p : f_code_n);
518
519#ifdef HTFM
520    /***** HYPOTHESIS TESTING ********/  /* 2/28/01 */
521    if (collect)
522    {
523        collect = 0;
524        UpdateHTFM(video, newvar, exp_lamda, &htfm_stat);
525    }
526    /*********************************/
527#endif
528
529    return ;
530}
531
532
533#ifdef HTFM
534void InitHTFM(VideoEncData *video, HTFM_Stat *htfm_stat, double *newvar, Int *collect)
535{
536    Int i;
537    Int lx = video->currVop->width; //  padding
538    Int lx2 = lx << 1;
539    Int lx3 = lx2 + lx;
540    Int rx = video->currVop->pitch;
541    Int rx2 = rx << 1;
542    Int rx3 = rx2 + rx;
543
544    Int *offset, *offset2;
545
546    /* 4/11/01, collect data every 30 frames, doesn't have to be base layer */
547    if (((Int)video->numVopsInGOP) % 30 == 1)
548    {
549
550        *collect = 1;
551
552        htfm_stat->countbreak = 0;
553        htfm_stat->abs_dif_mad_avg = 0;
554
555        for (i = 0; i < 16; i++)
556        {
557            newvar[i] = 0.0;
558        }
559//      video->functionPointer->SAD_MB_PADDING = &SAD_MB_PADDING_HTFM_Collect;
560        video->functionPointer->SAD_Macroblock = &SAD_MB_HTFM_Collect;
561        video->functionPointer->SAD_MB_HalfPel[0] = NULL;
562        video->functionPointer->SAD_MB_HalfPel[1] = &SAD_MB_HP_HTFM_Collectxh;
563        video->functionPointer->SAD_MB_HalfPel[2] = &SAD_MB_HP_HTFM_Collectyh;
564        video->functionPointer->SAD_MB_HalfPel[3] = &SAD_MB_HP_HTFM_Collectxhyh;
565        video->sad_extra_info = (void*)(htfm_stat);
566        offset = htfm_stat->offsetArray;
567        offset2 = htfm_stat->offsetRef;
568    }
569    else
570    {
571//      video->functionPointer->SAD_MB_PADDING = &SAD_MB_PADDING_HTFM;
572        video->functionPointer->SAD_Macroblock = &SAD_MB_HTFM;
573        video->functionPointer->SAD_MB_HalfPel[0] = NULL;
574        video->functionPointer->SAD_MB_HalfPel[1] = &SAD_MB_HP_HTFMxh;
575        video->functionPointer->SAD_MB_HalfPel[2] = &SAD_MB_HP_HTFMyh;
576        video->functionPointer->SAD_MB_HalfPel[3] = &SAD_MB_HP_HTFMxhyh;
577        video->sad_extra_info = (void*)(video->nrmlz_th);
578        offset = video->nrmlz_th + 16;
579        offset2 = video->nrmlz_th + 32;
580    }
581
582    offset[0] = 0;
583    offset[1] = lx2 + 2;
584    offset[2] = 2;
585    offset[3] = lx2;
586    offset[4] = lx + 1;
587    offset[5] = lx3 + 3;
588    offset[6] = lx + 3;
589    offset[7] = lx3 + 1;
590    offset[8] = lx;
591    offset[9] = lx3 + 2;
592    offset[10] = lx3 ;
593    offset[11] = lx + 2 ;
594    offset[12] = 1;
595    offset[13] = lx2 + 3;
596    offset[14] = lx2 + 1;
597    offset[15] = 3;
598
599    offset2[0] = 0;
600    offset2[1] = rx2 + 2;
601    offset2[2] = 2;
602    offset2[3] = rx2;
603    offset2[4] = rx + 1;
604    offset2[5] = rx3 + 3;
605    offset2[6] = rx + 3;
606    offset2[7] = rx3 + 1;
607    offset2[8] = rx;
608    offset2[9] = rx3 + 2;
609    offset2[10] = rx3 ;
610    offset2[11] = rx + 2 ;
611    offset2[12] = 1;
612    offset2[13] = rx2 + 3;
613    offset2[14] = rx2 + 1;
614    offset2[15] = 3;
615
616    return ;
617}
618
619void UpdateHTFM(VideoEncData *video, double *newvar, double *exp_lamda, HTFM_Stat *htfm_stat)
620{
621    if (htfm_stat->countbreak == 0)
622        htfm_stat->countbreak = 1;
623
624    newvar[0] = (double)(htfm_stat->abs_dif_mad_avg) / (htfm_stat->countbreak * 16.);
625
626    if (newvar[0] < 0.001)
627    {
628        newvar[0] = 0.001; /* to prevent floating overflow */
629    }
630    exp_lamda[0] =  1 / (newvar[0] * 1.4142136);
631    exp_lamda[1] = exp_lamda[0] * 1.5825;
632    exp_lamda[2] = exp_lamda[0] * 2.1750;
633    exp_lamda[3] = exp_lamda[0] * 3.5065;
634    exp_lamda[4] = exp_lamda[0] * 3.1436;
635    exp_lamda[5] = exp_lamda[0] * 3.5315;
636    exp_lamda[6] = exp_lamda[0] * 3.7449;
637    exp_lamda[7] = exp_lamda[0] * 4.5854;
638    exp_lamda[8] = exp_lamda[0] * 4.6191;
639    exp_lamda[9] = exp_lamda[0] * 5.4041;
640    exp_lamda[10] = exp_lamda[0] * 6.5974;
641    exp_lamda[11] = exp_lamda[0] * 10.5341;
642    exp_lamda[12] = exp_lamda[0] * 10.0719;
643    exp_lamda[13] = exp_lamda[0] * 12.0516;
644    exp_lamda[14] = exp_lamda[0] * 15.4552;
645
646    CalcThreshold(HTFM_Pf, exp_lamda, video->nrmlz_th);
647    return ;
648}
649
650
651void CalcThreshold(double pf, double exp_lamda[], Int nrmlz_th[])
652{
653    Int i;
654    double temp[15];
655    //  printf("\nLamda: ");
656
657    /* parametric PREMODELling */
658    for (i = 0; i < 15; i++)
659    {
660        //    printf("%g ",exp_lamda[i]);
661        if (pf < 0.5)
662            temp[i] = 1 / exp_lamda[i] * M4VENC_LOG(2 * pf);
663        else
664            temp[i] = -1 / exp_lamda[i] * M4VENC_LOG(2 * (1 - pf));
665    }
666
667    nrmlz_th[15] = 0;
668    for (i = 0; i < 15; i++)        /* scale upto no.pixels */
669        nrmlz_th[i] = (Int)(temp[i] * ((i + 1) << 4) + 0.5);
670
671    return ;
672}
673
674void    HTFMPrepareCurMB(VideoEncData *video, HTFM_Stat *htfm_stat, UChar *cur)
675{
676    void* tmp = (void*)(video->currYMB);
677    ULong *htfmMB = (ULong*)tmp;
678    UChar *ptr, byte;
679    Int *offset;
680    Int i;
681    ULong word;
682    Int width = video->currVop->width;
683
684    if (((Int)video->numVopsInGOP) % 30 == 1)
685    {
686        offset = htfm_stat->offsetArray;
687    }
688    else
689    {
690        offset = video->nrmlz_th + 16;
691    }
692
693    for (i = 0; i < 16; i++)
694    {
695        ptr = cur + offset[i];
696        word = ptr[0];
697        byte = ptr[4];
698        word |= (byte << 8);
699        byte = ptr[8];
700        word |= (byte << 16);
701        byte = ptr[12];
702        word |= (byte << 24);
703        *htfmMB++ = word;
704
705        word = *(ptr += (width << 2));
706        byte = ptr[4];
707        word |= (byte << 8);
708        byte = ptr[8];
709        word |= (byte << 16);
710        byte = ptr[12];
711        word |= (byte << 24);
712        *htfmMB++ = word;
713
714        word = *(ptr += (width << 2));
715        byte = ptr[4];
716        word |= (byte << 8);
717        byte = ptr[8];
718        word |= (byte << 16);
719        byte = ptr[12];
720        word |= (byte << 24);
721        *htfmMB++ = word;
722
723        word = *(ptr += (width << 2));
724        byte = ptr[4];
725        word |= (byte << 8);
726        byte = ptr[8];
727        word |= (byte << 16);
728        byte = ptr[12];
729        word |= (byte << 24);
730        *htfmMB++ = word;
731    }
732
733    return ;
734}
735
736
737#endif
738
739void    PrepareCurMB(VideoEncData *video, UChar *cur)
740{
741    void* tmp = (void*)(video->currYMB);
742    ULong *currYMB = (ULong*)tmp;
743    Int i;
744    Int width = video->currVop->width;
745
746    cur -= width;
747
748    for (i = 0; i < 16; i++)
749    {
750        *currYMB++ = *((ULong*)(cur += width));
751        *currYMB++ = *((ULong*)(cur + 4));
752        *currYMB++ = *((ULong*)(cur + 8));
753        *currYMB++ = *((ULong*)(cur + 12));
754    }
755
756    return ;
757}
758
759
760/*==================================================================
761    Function:   MBMotionSearch
762    Date:       09/06/2000
763    Purpose:    Perform motion estimation for a macroblock.
764                Find 1MV and 4MVs in half-pels resolutions.
765                Using ST1 algorithm provided by Chalidabhongse and Kuo
766                CSVT March'98.
767
768==================================================================*/
769
770void MBMotionSearch(VideoEncData *video, UChar *cur, UChar *best_cand[],
771                    Int i0, Int j0, Int type_pred, Int FS_en, Int *hp_guess)
772{
773    Vol *currVol = video->vol[video->currLayer];
774    UChar *ref, *cand, *ncand = NULL, *cur8;
775    void *extra_info = video->sad_extra_info;
776    Int mbnum = video->mbnum;
777    Int width = video->currVop->width; /* 6/12/01, must be multiple of 16 */
778    Int height = video->currVop->height;
779    MOT **mot = video->mot;
780    UChar use_4mv = video->encParams->MV8x8_Enabled;
781    UChar h263_mode = video->encParams->H263_Enabled;
782    Int(*SAD_Macroblock)(UChar*, UChar*, Int, void*) = video->functionPointer->SAD_Macroblock;
783    Int(*SAD_Block)(UChar*, UChar*, Int, Int, void*) = video->functionPointer->SAD_Block;
784    VideoEncParams *encParams = video->encParams;
785    Int range = encParams->SearchRange;
786
787    Int lx = video->currVop->pitch; /* padding */
788    Int comp;
789    Int i, j, imin, jmin, ilow, ihigh, jlow, jhigh, iorg, jorg;
790    Int d, dmin, dn[9];
791#if (ZERO_MV_PREF==1)   /* compute (0,0) MV at the end */
792    Int d0;
793#endif
794    Int k;
795    Int mvx[5], mvy[5], imin0, jmin0;
796    Int num_can, center_again;
797    Int last_loc, new_loc = 0;
798    Int step, max_step = range >> 1;
799    Int next;
800
801    ref = video->forwardRefVop->yChan; /* origin of actual frame */
802
803    cur = video->currYMB; /* use smaller memory space for current MB */
804
805    /*  find limit of the search (adjusting search range)*/
806
807    if (!h263_mode)
808    {
809        ilow = i0 - range;
810        if (ilow < -15)
811            ilow = -15;
812        ihigh = i0 + range - 1;
813        if (ihigh > width - 1)
814            ihigh = width - 1;
815        jlow = j0 - range;
816        if (jlow < -15)
817            jlow = -15;
818        jhigh = j0 + range - 1;
819        if (jhigh > height - 1)
820            jhigh = height - 1;
821    }
822    else
823    {
824        ilow = i0 - range;
825        if (ilow < 0)
826            ilow = 0;
827        ihigh = i0 + range - 1;
828        if (ihigh > width - 16)
829            ihigh = width - 16;
830        jlow = j0 - range;
831        if (jlow < 0)
832            jlow = 0;
833        jhigh = j0 + range - 1;
834        if (jhigh > height - 16)
835            jhigh = height - 16;
836    }
837
838    imin = i0;
839    jmin = j0; /* needed for fullsearch */
840    ncand = ref + imin + jmin * lx;
841
842    /* for first row of MB, fullsearch can be used */
843    if (FS_en)
844    {
845        *hp_guess = 0; /* no guess for fast half-pel */
846
847        dmin =  fullsearch(video, currVol, ref, cur, &imin, &jmin, ilow, ihigh, jlow, jhigh);
848
849        ncand = ref + imin + jmin * lx;
850
851        mot[mbnum][0].sad = dmin;
852        mot[mbnum][0].x = (imin - i0) << 1;
853        mot[mbnum][0].y = (jmin - j0) << 1;
854        imin0 = imin << 1;  /* 16x16 MV in half-pel resolution */
855        jmin0 = jmin << 1;
856        best_cand[0] = ncand;
857    }
858    else
859    {   /* 4/7/01, modified this testing for fullsearch the top row to only upto (0,3) MB */
860        /*            upto 30% complexity saving with the same complexity */
861        if (video->forwardRefVop->predictionType == I_VOP && j0 == 0 && i0 <= 64 && type_pred != 1)
862        {
863            *hp_guess = 0; /* no guess for fast half-pel */
864            dmin =  fullsearch(video, currVol, ref, cur, &imin, &jmin, ilow, ihigh, jlow, jhigh);
865            ncand = ref + imin + jmin * lx;
866        }
867        else
868        {
869            /************** initialize candidate **************************/
870            /* find initial motion vector */
871            CandidateSelection(mvx, mvy, &num_can, i0 >> 4, j0 >> 4, video, type_pred);
872
873            dmin = 65535;
874
875            /* check if all are equal */
876            if (num_can == ALL_CAND_EQUAL)
877            {
878                i = i0 + mvx[0];
879                j = j0 + mvy[0];
880
881                if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
882                {
883                    cand = ref + i + j * lx;
884
885                    d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
886
887                    if (d < dmin)
888                    {
889                        dmin = d;
890                        imin = i;
891                        jmin = j;
892                        ncand = cand;
893                    }
894                }
895            }
896            else
897            {
898                /************** evaluate unique candidates **********************/
899                for (k = 0; k < num_can; k++)
900                {
901                    i = i0 + mvx[k];
902                    j = j0 + mvy[k];
903
904                    if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
905                    {
906                        cand = ref + i + j * lx;
907                        d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
908
909                        if (d < dmin)
910                        {
911                            dmin = d;
912                            imin = i;
913                            jmin = j;
914                            ncand = cand;
915                        }
916                        else if ((d == dmin) && PV_ABS(mvx[k]) + PV_ABS(mvy[k]) < PV_ABS(i0 - imin) + PV_ABS(j0 - jmin))
917                        {
918                            dmin = d;
919                            imin = i;
920                            jmin = j;
921                            ncand = cand;
922                        }
923                    }
924                }
925            }
926            if (num_can == 0 || dmin == 65535) /* no candidate selected */
927            {
928                ncand = ref + i0 + j0 * lx; /* use (0,0) MV as initial value */
929                mot[mbnum][7].sad = dmin = (*SAD_Macroblock)(ncand, cur, (65535 << 16) | lx, extra_info);
930#if (ZERO_MV_PREF==1)   /* compute (0,0) MV at the end */
931                d0 = dmin;
932#endif
933                imin = i0;
934                jmin = j0;
935            }
936
937#if (ZERO_MV_PREF==0)  /*  COMPUTE ZERO VECTOR FIRST !!!!!*/
938            dmin -= PREF_NULL_VEC;
939#endif
940
941            /******************* local refinement ***************************/
942            center_again = 0;
943            last_loc = new_loc = 0;
944            //          ncand = ref + jmin*lx + imin;  /* center of the search */
945            step = 0;
946            dn[0] = dmin;
947            while (!center_again && step <= max_step)
948            {
949
950                MoveNeighborSAD(dn, last_loc);
951
952                center_again = 1;
953                i = imin;
954                j = jmin - 1;
955                cand = ref + i + j * lx;
956
957                /*  starting from [0,-1] */
958                /* spiral check one step at a time*/
959                for (k = 2; k <= 8; k += 2)
960                {
961                    if (!tab_exclude[last_loc][k]) /* exclude last step computation */
962                    {       /* not already computed */
963                        if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
964                        {
965                            d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info);
966                            dn[k] = d; /* keep it for half pel use */
967
968                            if (d < dmin)
969                            {
970                                ncand = cand;
971                                dmin = d;
972                                imin = i;
973                                jmin = j;
974                                center_again = 0;
975                                new_loc = k;
976                            }
977                            else if ((d == dmin) && PV_ABS(i0 - i) + PV_ABS(j0 - j) < PV_ABS(i0 - imin) + PV_ABS(j0 - jmin))
978                            {
979                                ncand = cand;
980                                imin = i;
981                                jmin = j;
982                                center_again = 0;
983                                new_loc = k;
984                            }
985                        }
986                    }
987                    if (k == 8)  /* end side search*/
988                    {
989                        if (!center_again)
990                        {
991                            k = -1; /* start diagonal search */
992                            cand -= lx;
993                            j--;
994                        }
995                    }
996                    else
997                    {
998                        next = refine_next[k][0];
999                        i += next;
1000                        cand += next;
1001                        next = refine_next[k][1];
1002                        j += next;
1003                        cand += lx * next;
1004                    }
1005                }
1006                last_loc = new_loc;
1007                step ++;
1008            }
1009            if (!center_again)
1010                MoveNeighborSAD(dn, last_loc);
1011
1012            *hp_guess = FindMin(dn);
1013
1014        }
1015
1016#if (ZERO_MV_PREF==1)   /* compute (0,0) MV at the end */
1017        if (d0 - PREF_NULL_VEC < dmin)
1018        {
1019            ncand = ref + i0 + j0 * lx;
1020            dmin = d0;
1021            imin = i0;
1022            jmin = j0;
1023        }
1024#endif
1025        mot[mbnum][0].sad = dmin;
1026        mot[mbnum][0].x = (imin - i0) << 1;
1027        mot[mbnum][0].y = (jmin - j0) << 1;
1028        imin0 = imin << 1;  /* 16x16 MV in half-pel resolution */
1029        jmin0 = jmin << 1;
1030        best_cand[0] = ncand;
1031    }
1032    /* imin and jmin is the best 1 MV */
1033#ifndef NO_INTER4V
1034    /*******************  Find 4 motion vectors ****************************/
1035    if (use_4mv && !h263_mode)
1036    {
1037#ifdef _SAD_STAT
1038        num_Blk += 4;
1039#endif
1040        /* starting from the best 1MV */
1041        //offset = imin + jmin*lx;
1042        iorg = i0;
1043        jorg = j0;
1044
1045        for (comp = 0; comp < 4; comp++)
1046        {
1047            i0 = iorg + ((comp & 1) << 3);
1048            j0 = jorg + ((comp & 2) << 2);
1049
1050            imin = (imin0 >> 1) + ((comp & 1) << 3);    /* starting point from 16x16 MV */
1051            jmin = (jmin0 >> 1) + ((comp & 2) << 2);
1052            ncand = ref + imin + jmin * lx;
1053
1054            cur8 = cur + ((comp & 1) << 3) + (((comp & 2) << 2) << 4) ; /* 11/30/05, smaller cache */
1055
1056            /*  find limit of the search (adjusting search range)*/
1057            ilow = i0 - range;
1058            ihigh = i0 + range - 1 ;/* 4/9/01 */
1059            if (ilow < -15)
1060                ilow = -15;
1061            if (ihigh > width - 1)
1062                ihigh = width - 1;
1063            jlow = j0 - range;
1064            jhigh = j0 + range - 1 ;/* 4/9/01 */
1065            if (jlow < -15)
1066                jlow = -15;
1067            if (jhigh > height - 1)
1068                jhigh = height - 1;
1069
1070            SAD_Block = video->functionPointer->SAD_Block;
1071
1072            if (FS_en)  /* fullsearch enable, center around 16x16 MV */
1073            {
1074                dmin =  fullsearchBlk(video, currVol, ncand, cur8, &imin, &jmin, ilow, ihigh, jlow, jhigh, range);
1075                ncand = ref + imin + jmin * lx;
1076
1077                mot[mbnum][comp+1].sad = dmin;
1078                mot[mbnum][comp+1].x = (imin - i0) << 1;
1079                mot[mbnum][comp+1].y = (jmin - j0) << 1;
1080                best_cand[comp+1] = ncand;
1081            }
1082            else    /* no fullsearch, do local search */
1083            {
1084                /* starting point from 16x16 */
1085                dmin = (*SAD_Block)(ncand, cur8, 65536, lx, extra_info);
1086
1087                /******************* local refinement ***************************/
1088                center_again = 0;
1089                last_loc = 0;
1090
1091                while (!center_again)
1092                {
1093                    center_again = 1;
1094                    i = imin;
1095                    j = jmin - 1;
1096                    cand = ref + i + j * lx;
1097
1098                    /*  starting from [0,-1] */
1099                    /* spiral check one step at a time*/
1100                    for (k = 2; k <= 8; k += 2)
1101                    {
1102                        if (!tab_exclude[last_loc][k]) /* exclude last step computation */
1103                        {       /* not already computed */
1104                            if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
1105                            {
1106                                d = (*SAD_Block)(cand, cur8, dmin, lx, extra_info);
1107
1108                                if (d < dmin)
1109                                {
1110                                    ncand = cand;
1111                                    dmin = d;
1112                                    imin = i;
1113                                    jmin = j;
1114                                    center_again = 0;
1115                                    new_loc = k;
1116                                }
1117                                else if ((d == dmin) &&
1118                                         PV_ABS(i0 - i) + PV_ABS(j0 - j) < PV_ABS(i0 - imin) + PV_ABS(j0 - jmin))
1119                                {
1120                                    ncand = cand;
1121                                    imin = i;
1122                                    jmin = j;
1123                                    center_again = 0;
1124                                    new_loc = k;
1125                                }
1126                            }
1127                        }
1128                        if (k == 8)  /* end side search*/
1129                        {
1130                            if (!center_again)
1131                            {
1132                                k = -1; /* start diagonal search */
1133                                if (j <= height - 1 && j > 0)   cand -= lx;
1134                                j--;
1135                            }
1136                        }
1137                        else
1138                        {
1139                            next = refine_next[k][0];
1140                            cand += next;
1141                            i += next;
1142                            next = refine_next[k][1];
1143                            cand += lx * next;
1144                            j += next;
1145                        }
1146                    }
1147                    last_loc = new_loc;
1148                }
1149                mot[mbnum][comp+1].sad = dmin;
1150                mot[mbnum][comp+1].x = (imin - i0) << 1;
1151                mot[mbnum][comp+1].y = (jmin - j0) << 1;
1152                best_cand[comp+1] = ncand;
1153            }
1154            /********************************************/
1155        }
1156    }
1157    else
1158#endif  /* NO_INTER4V */
1159    {
1160        mot[mbnum][1].sad = mot[mbnum][2].sad = mot[mbnum][3].sad = mot[mbnum][4].sad = (dmin + 2) >> 2;
1161        mot[mbnum][1].x = mot[mbnum][2].x = mot[mbnum][3].x = mot[mbnum][4].x = mot[mbnum][0].x;
1162        mot[mbnum][1].y = mot[mbnum][2].y = mot[mbnum][3].y = mot[mbnum][4].y = mot[mbnum][0].y;
1163        best_cand[1] = best_cand[2] = best_cand[3] = best_cand[4] = ncand;
1164
1165    }
1166    return ;
1167}
1168
1169
1170/*===============================================================================
1171    Function:   fullsearch
1172    Date:       09/16/2000
1173    Purpose:    Perform full-search motion estimation over the range of search
1174                region in a spiral-outward manner.
1175    Input/Output:   VideoEncData, current Vol, previou Vop, pointer to the left corner of
1176                current VOP, current coord (also output), boundaries.
1177===============================================================================*/
1178
1179Int fullsearch(VideoEncData *video, Vol *currVol, UChar *prev, UChar *cur,
1180               Int *imin, Int *jmin, Int ilow, Int ihigh, Int jlow, Int jhigh)
1181{
1182    Int range = video->encParams->SearchRange;
1183    UChar *cand;
1184    Int i, j, k, l;
1185    Int d, dmin;
1186    Int i0 = *imin; /* current position */
1187    Int j0 = *jmin;
1188    Int(*SAD_Macroblock)(UChar*, UChar*, Int, void*) = video->functionPointer->SAD_Macroblock;
1189    void *extra_info = video->sad_extra_info;
1190//  UChar h263_mode = video->encParams->H263_Enabled;
1191    Int lx = video->currVop->pitch; /* with padding */
1192
1193    Int offset = i0 + j0 * lx;
1194
1195    OSCL_UNUSED_ARG(currVol);
1196
1197    cand = prev + offset;
1198
1199    dmin  = (*SAD_Macroblock)(cand, cur, (65535 << 16) | lx, (void*)extra_info) - PREF_NULL_VEC;
1200
1201    /* perform spiral search */
1202    for (k = 1; k <= range; k++)
1203    {
1204
1205        i = i0 - k;
1206        j = j0 - k;
1207
1208        cand = prev + i + j * lx;
1209
1210        for (l = 0; l < 8*k; l++)
1211        {
1212            /* no need for boundary checking again */
1213            if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
1214            {
1215                d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, (void*)extra_info);
1216
1217                if (d < dmin)
1218                {
1219                    dmin = d;
1220                    *imin = i;
1221                    *jmin = j;
1222                }
1223                else if ((d == dmin) && PV_ABS(i0 - i) + PV_ABS(j0 - j) < PV_ABS(i0 - *imin) + PV_ABS(j0 - *jmin))
1224                {
1225                    dmin = d;
1226                    *imin = i;
1227                    *jmin = j;
1228                }
1229            }
1230
1231            if (l < (k << 1))
1232            {
1233                i++;
1234                cand++;
1235            }
1236            else if (l < (k << 2))
1237            {
1238                j++;
1239                cand += lx;
1240            }
1241            else if (l < ((k << 2) + (k << 1)))
1242            {
1243                i--;
1244                cand--;
1245            }
1246            else
1247            {
1248                j--;
1249                cand -= lx;
1250            }
1251        }
1252    }
1253
1254    return dmin;
1255}
1256
1257#ifndef NO_INTER4V
1258/*===============================================================================
1259    Function:   fullsearchBlk
1260    Date:       01/9/2001
1261    Purpose:    Perform full-search motion estimation of an 8x8 block over the range
1262                of search region in a spiral-outward manner centered at the 16x16 MV.
1263    Input/Output:   VideoEncData, MB coordinate, pointer to the initial MV on the
1264                reference, pointer to coor of current block, search range.
1265===============================================================================*/
1266Int fullsearchBlk(VideoEncData *video, Vol *currVol, UChar *cent, UChar *cur,
1267                  Int *imin, Int *jmin, Int ilow, Int ihigh, Int jlow, Int jhigh, Int range)
1268{
1269    UChar *cand, *ref;
1270    Int i, j, k, l, istart, jstart;
1271    Int d, dmin;
1272    Int lx = video->currVop->pitch; /* with padding */
1273    Int(*SAD_Block)(UChar*, UChar*, Int, Int, void*) = video->functionPointer->SAD_Block;
1274    void *extra_info = video->sad_extra_info;
1275
1276    OSCL_UNUSED_ARG(currVol);
1277
1278    /* starting point centered at 16x16 MV */
1279    ref = cent;
1280    istart = *imin;
1281    jstart = *jmin;
1282
1283    dmin = (*SAD_Block)(ref, cur, 65536, lx, (void*)extra_info);
1284
1285    cand = ref;
1286    /* perform spiral search */
1287    for (k = 1; k <= range; k++)
1288    {
1289
1290        i = istart - k;
1291        j = jstart - k;
1292        cand -= (lx + 1);  /* candidate region */
1293
1294        for (l = 0; l < 8*k; l++)
1295        {
1296            /* no need for boundary checking again */
1297            if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh)
1298            {
1299                d = (*SAD_Block)(cand, cur, dmin, lx, (void*)extra_info);
1300
1301                if (d < dmin)
1302                {
1303                    dmin = d;
1304                    *imin = i;
1305                    *jmin = j;
1306                }
1307                else if ((d == dmin) &&
1308                         PV_ABS(istart - i) + PV_ABS(jstart - j) < PV_ABS(istart - *imin) + PV_ABS(jstart - *jmin))
1309                {
1310                    dmin = d;
1311                    *imin = i;
1312                    *jmin = j;
1313                }
1314            }
1315
1316            if (l < (k << 1))
1317            {
1318                i++;
1319                cand++;
1320            }
1321            else if (l < (k << 2))
1322            {
1323                j++;
1324                cand += lx;
1325            }
1326            else if (l < ((k << 2) + (k << 1)))
1327            {
1328                i--;
1329                cand--;
1330            }
1331            else
1332            {
1333                j--;
1334                cand -= lx;
1335            }
1336        }
1337    }
1338
1339    return dmin;
1340}
1341#endif /* NO_INTER4V */
1342
1343/*===============================================================================
1344    Function:   CandidateSelection
1345    Date:       09/16/2000
1346    Purpose:    Fill up the list of candidate using spatio-temporal correlation
1347                among neighboring blocks.
1348    Input/Output:   type_pred = 0: first pass, 1: second pass, or no SCD
1349    Modified:    09/23/01, get rid of redundant candidates before passing back.
1350===============================================================================*/
1351
1352void CandidateSelection(Int *mvx, Int *mvy, Int *num_can, Int imb, Int jmb,
1353                        VideoEncData *video, Int type_pred)
1354{
1355    MOT **mot = video->mot;
1356    MOT *pmot;
1357    Int mbnum = video->mbnum;
1358    Vol *currVol = video->vol[video->currLayer];
1359    Int mbwidth = currVol->nMBPerRow;
1360    Int mbheight = currVol->nMBPerCol;
1361    Int i, j, same, num1;
1362
1363    *num_can = 0;
1364
1365    if (video->forwardRefVop->predictionType == P_VOP)
1366    {
1367        /* Spatio-Temporal Candidate (five candidates) */
1368        if (type_pred == 0) /* first pass */
1369        {
1370            pmot = &mot[mbnum][0]; /* same coordinate previous frame */
1371            mvx[(*num_can)] = (pmot->x) >> 1;
1372            mvy[(*num_can)++] = (pmot->y) >> 1;
1373            if (imb >= (mbwidth >> 1) && imb > 0)  /*left neighbor previous frame */
1374            {
1375                pmot = &mot[mbnum-1][0];
1376                mvx[(*num_can)] = (pmot->x) >> 1;
1377                mvy[(*num_can)++] = (pmot->y) >> 1;
1378            }
1379            else if (imb + 1 < mbwidth)   /*right neighbor previous frame */
1380            {
1381                pmot = &mot[mbnum+1][0];
1382                mvx[(*num_can)] = (pmot->x) >> 1;
1383                mvy[(*num_can)++] = (pmot->y) >> 1;
1384            }
1385
1386            if (jmb < mbheight - 1)  /*bottom neighbor previous frame */
1387            {
1388                pmot = &mot[mbnum+mbwidth][0];
1389                mvx[(*num_can)] = (pmot->x) >> 1;
1390                mvy[(*num_can)++] = (pmot->y) >> 1;
1391            }
1392            else if (jmb > 0)   /*upper neighbor previous frame */
1393            {
1394                pmot = &mot[mbnum-mbwidth][0];
1395                mvx[(*num_can)] = (pmot->x) >> 1;
1396                mvy[(*num_can)++] = (pmot->y) >> 1;
1397            }
1398
1399            if (imb > 0 && jmb > 0)  /* upper-left neighbor current frame*/
1400            {
1401                pmot = &mot[mbnum-mbwidth-1][0];
1402                mvx[(*num_can)] = (pmot->x) >> 1;
1403                mvy[(*num_can)++] = (pmot->y) >> 1;
1404            }
1405            if (jmb > 0 && imb < mbheight - 1)  /* upper right neighbor current frame*/
1406            {
1407                pmot = &mot[mbnum-mbwidth+1][0];
1408                mvx[(*num_can)] = (pmot->x) >> 1;
1409                mvy[(*num_can)++] = (pmot->y) >> 1;
1410            }
1411        }
1412        else    /* second pass */
1413            /* original ST1 algorithm */
1414        {
1415            pmot = &mot[mbnum][0]; /* same coordinate previous frame */
1416            mvx[(*num_can)] = (pmot->x) >> 1;
1417            mvy[(*num_can)++] = (pmot->y) >> 1;
1418
1419            if (imb > 0)  /*left neighbor current frame */
1420            {
1421                pmot = &mot[mbnum-1][0];
1422                mvx[(*num_can)] = (pmot->x) >> 1;
1423                mvy[(*num_can)++] = (pmot->y) >> 1;
1424            }
1425            if (jmb > 0)  /*upper neighbor current frame */
1426            {
1427                pmot = &mot[mbnum-mbwidth][0];
1428                mvx[(*num_can)] = (pmot->x) >> 1;
1429                mvy[(*num_can)++] = (pmot->y) >> 1;
1430            }
1431            if (imb < mbwidth - 1)  /*right neighbor previous frame */
1432            {
1433                pmot = &mot[mbnum+1][0];
1434                mvx[(*num_can)] = (pmot->x) >> 1;
1435                mvy[(*num_can)++] = (pmot->y) >> 1;
1436            }
1437            if (jmb < mbheight - 1)  /*bottom neighbor previous frame */
1438            {
1439                pmot = &mot[mbnum+mbwidth][0];
1440                mvx[(*num_can)] = (pmot->x) >> 1;
1441                mvy[(*num_can)++] = (pmot->y) >> 1;
1442            }
1443        }
1444    }
1445    else  /* only Spatial Candidate (four candidates)*/
1446    {
1447        if (type_pred == 0) /*first pass*/
1448        {
1449            if (imb > 1)  /* neighbor two blocks away to the left */
1450            {
1451                pmot = &mot[mbnum-2][0];
1452                mvx[(*num_can)] = (pmot->x) >> 1;
1453                mvy[(*num_can)++] = (pmot->y) >> 1;
1454            }
1455            if (imb > 0 && jmb > 0)  /* upper-left neighbor */
1456            {
1457                pmot = &mot[mbnum-mbwidth-1][0];
1458                mvx[(*num_can)] = (pmot->x) >> 1;
1459                mvy[(*num_can)++] = (pmot->y) >> 1;
1460            }
1461            if (jmb > 0 && imb < mbheight - 1)  /* upper right neighbor */
1462            {
1463                pmot = &mot[mbnum-mbwidth+1][0];
1464                mvx[(*num_can)] = (pmot->x) >> 1;
1465                mvy[(*num_can)++] = (pmot->y) >> 1;
1466            }
1467        }
1468//#ifdef SCENE_CHANGE_DETECTION
1469        /* second pass (ST2 algorithm)*/
1470        else if (type_pred == 1) /* 4/7/01 */
1471        {
1472            if (imb > 0)  /*left neighbor current frame */
1473            {
1474                pmot = &mot[mbnum-1][0];
1475                mvx[(*num_can)] = (pmot->x) >> 1;
1476                mvy[(*num_can)++] = (pmot->y) >> 1;
1477            }
1478            if (jmb > 0)  /*upper neighbor current frame */
1479            {
1480                pmot = &mot[mbnum-mbwidth][0];
1481                mvx[(*num_can)] = (pmot->x) >> 1;
1482                mvy[(*num_can)++] = (pmot->y) >> 1;
1483            }
1484            if (imb < mbwidth - 1)  /*right neighbor current frame */
1485            {
1486                pmot = &mot[mbnum+1][0];
1487                mvx[(*num_can)] = (pmot->x) >> 1;
1488                mvy[(*num_can)++] = (pmot->y) >> 1;
1489            }
1490            if (jmb < mbheight - 1)  /*bottom neighbor current frame */
1491            {
1492                pmot = &mot[mbnum+mbwidth][0];
1493                mvx[(*num_can)] = (pmot->x) >> 1;
1494                mvy[(*num_can)++] = (pmot->y) >> 1;
1495            }
1496        }
1497//#else
1498        else /* original ST1 algorithm */
1499        {
1500            if (imb > 0)  /*left neighbor current frame */
1501            {
1502                pmot = &mot[mbnum-1][0];
1503                mvx[(*num_can)] = (pmot->x) >> 1;
1504                mvy[(*num_can)++] = (pmot->y) >> 1;
1505
1506                if (jmb > 0)  /*upper-left neighbor current frame */
1507                {
1508                    pmot = &mot[mbnum-mbwidth-1][0];
1509                    mvx[(*num_can)] = (pmot->x) >> 1;
1510                    mvy[(*num_can)++] = (pmot->y) >> 1;
1511                }
1512
1513            }
1514            if (jmb > 0)  /*upper neighbor current frame */
1515            {
1516                pmot = &mot[mbnum-mbwidth][0];
1517                mvx[(*num_can)] = (pmot->x) >> 1;
1518                mvy[(*num_can)++] = (pmot->y) >> 1;
1519
1520                if (imb < mbheight - 1)  /*upper-right neighbor current frame */
1521                {
1522                    pmot = &mot[mbnum-mbwidth+1][0];
1523                    mvx[(*num_can)] = (pmot->x) >> 1;
1524                    mvy[(*num_can)++] = (pmot->y) >> 1;
1525                }
1526            }
1527        }
1528//#endif
1529    }
1530
1531    /* 3/23/01, remove redundant candidate (possible k-mean) */
1532    num1 = *num_can;
1533    *num_can = 1;
1534    for (i = 1; i < num1; i++)
1535    {
1536        same = 0;
1537        j = 0;
1538        while (!same && j < *num_can)
1539        {
1540#if (CANDIDATE_DISTANCE==0)
1541            if (mvx[i] == mvx[j] && mvy[i] == mvy[j])
1542#else
1543            // modified k-mean, 3/24/01, shouldn't be greater than 3
1544            if (PV_ABS(mvx[i] - mvx[j]) + PV_ABS(mvy[i] - mvy[j]) < CANDIDATE_DISTANCE)
1545#endif
1546                same = 1;
1547            j++;
1548        }
1549        if (!same)
1550        {
1551            mvx[*num_can] = mvx[i];
1552            mvy[*num_can] = mvy[i];
1553            (*num_can)++;
1554        }
1555    }
1556
1557#ifdef _SAD_STAT
1558    num_cand += (*num_can);
1559#endif
1560
1561    if (num1 == 5 && *num_can == 1)
1562        *num_can = ALL_CAND_EQUAL; /* all are equal */
1563
1564    return ;
1565}
1566
1567/*===========================================================================
1568    Function:   RasterIntraUpdate
1569    Date:       2/26/01
1570    Purpose:    To raster-scan assign INTRA-update .
1571                N macroblocks are updated (also was programmable).
1572===========================================================================*/
1573void RasterIntraUpdate(UChar *intraArray, UChar *Mode, Int totalMB, Int numRefresh)
1574{
1575    Int indx, i;
1576
1577    /* find the last refresh MB */
1578    indx = 0;
1579    while (intraArray[indx] == 1 && indx < totalMB)
1580        indx++;
1581
1582    /* add more  */
1583    for (i = 0; i < numRefresh && indx < totalMB; i++)
1584    {
1585        Mode[indx] = MODE_INTRA;
1586        intraArray[indx++] = 1;
1587    }
1588
1589    /* if read the end of frame, reset and loop around */
1590    if (indx >= totalMB - 1)
1591    {
1592        ResetIntraUpdate(intraArray, totalMB);
1593        indx = 0;
1594        while (i < numRefresh && indx < totalMB)
1595        {
1596            intraArray[indx] = 1;
1597            Mode[indx++] = MODE_INTRA;
1598            i++;
1599        }
1600    }
1601
1602    return ;
1603}
1604
1605/*===========================================================================
1606    Function:   ResetIntraUpdate
1607    Date:       11/28/00
1608    Purpose:    Reset already intra updated flags to all zero
1609===========================================================================*/
1610
1611void ResetIntraUpdate(UChar *intraArray, Int totalMB)
1612{
1613    M4VENC_MEMSET(intraArray, 0, sizeof(UChar)*totalMB);
1614    return ;
1615}
1616
1617/*===========================================================================
1618    Function:   ResetIntraUpdateRegion
1619    Date:       12/1/00
1620    Purpose:    Reset already intra updated flags in one region to all zero
1621===========================================================================*/
1622void ResetIntraUpdateRegion(UChar *intraArray, Int start_i, Int rwidth,
1623                            Int start_j, Int rheight, Int mbwidth, Int mbheight)
1624{
1625    Int indx, j;
1626
1627    if (start_i + rwidth >= mbwidth)
1628        rwidth = mbwidth - start_i;
1629    if (start_j + rheight >= mbheight)
1630        rheight = mbheight - start_j;
1631
1632    for (j = start_j; j < start_j + rheight; j++)
1633    {
1634        indx = j * mbwidth;
1635        M4VENC_MEMSET(intraArray + indx + start_i, 0, sizeof(UChar)*rwidth);
1636    }
1637
1638    return ;
1639}
1640
1641/*************************************************************
1642    Function:   MoveNeighborSAD
1643    Date:       3/27/01
1644    Purpose:    Move neighboring SAD around when center has shifted
1645*************************************************************/
1646
1647void MoveNeighborSAD(Int dn[], Int new_loc)
1648{
1649    Int tmp[9];
1650    tmp[0] = dn[0];
1651    tmp[1] = dn[1];
1652    tmp[2] = dn[2];
1653    tmp[3] = dn[3];
1654    tmp[4] = dn[4];
1655    tmp[5] = dn[5];
1656    tmp[6] = dn[6];
1657    tmp[7] = dn[7];
1658    tmp[8] = dn[8];
1659    dn[0] = dn[1] = dn[2] = dn[3] = dn[4] = dn[5] = dn[6] = dn[7] = dn[8] = 65536;
1660
1661    switch (new_loc)
1662    {
1663        case 0:
1664            break;
1665        case 1:
1666            dn[4] = tmp[2];
1667            dn[5] = tmp[0];
1668            dn[6] = tmp[8];
1669            break;
1670        case 2:
1671            dn[4] = tmp[3];
1672            dn[5] = tmp[4];
1673            dn[6] = tmp[0];
1674            dn[7] = tmp[8];
1675            dn[8] = tmp[1];
1676            break;
1677        case 3:
1678            dn[6] = tmp[4];
1679            dn[7] = tmp[0];
1680            dn[8] = tmp[2];
1681            break;
1682        case 4:
1683            dn[1] = tmp[2];
1684            dn[2] = tmp[3];
1685            dn[6] = tmp[5];
1686            dn[7] = tmp[6];
1687            dn[8] = tmp[0];
1688            break;
1689        case 5:
1690            dn[1] = tmp[0];
1691            dn[2] = tmp[4];
1692            dn[8] = tmp[6];
1693            break;
1694        case 6:
1695            dn[1] = tmp[8];
1696            dn[2] = tmp[0];
1697            dn[3] = tmp[4];
1698            dn[4] = tmp[5];
1699            dn[8] = tmp[7];
1700            break;
1701        case 7:
1702            dn[2] = tmp[8];
1703            dn[3] = tmp[0];
1704            dn[4] = tmp[6];
1705            break;
1706        case 8:
1707            dn[2] = tmp[1];
1708            dn[3] = tmp[2];
1709            dn[4] = tmp[0];
1710            dn[5] = tmp[6];
1711            dn[6] = tmp[7];
1712            break;
1713    }
1714    dn[0] = tmp[new_loc];
1715
1716    return ;
1717}
1718
1719/* 3/28/01, find minimal of dn[9] */
1720
1721Int FindMin(Int dn[])
1722{
1723    Int min, i;
1724    Int dmin;
1725
1726    dmin = dn[1];
1727    min = 1;
1728    for (i = 2; i < 9; i++)
1729    {
1730        if (dn[i] < dmin)
1731        {
1732            dmin = dn[i];
1733            min = i;
1734        }
1735    }
1736
1737    return min;
1738}
1739
1740
1741
1742