1/******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*/
20/**
21 *******************************************************************************
22 * @file
23 *  ih264e_me.c
24 *
25 * @brief
26 *
27 *
28 * @author
29 *  Ittiam
30 *
31 * @par List of Functions:
32 *  -
33 *
34 * @remarks
35 *  None
36 *
37 *******************************************************************************
38 */
39
40/*****************************************************************************/
41/* File Includes                                                             */
42/*****************************************************************************/
43
44/* System include files */
45#include <stdio.h>
46#include <assert.h>
47#include <limits.h>
48#include <string.h>
49
50/* User include files */
51#include "ime_typedefs.h"
52#include "ime_distortion_metrics.h"
53#include "ime_defs.h"
54#include "ime_structs.h"
55#include "ime.h"
56#include "ime_macros.h"
57#include "ime_statistics.h"
58
59/**
60*******************************************************************************
61*
62* @brief Diamond Search
63*
64* @par Description:
65*  This function computes the sad at vertices of several layers of diamond grid
66*  at a time. The number of layers of diamond grid that would be evaluated is
67*  configurable.The function computes the sad at vertices of a diamond grid. If
68*  the sad at the center of the diamond grid is lesser than the sad at any other
69*  point of the diamond grid, the function marks the candidate Mb partition as
70*  mv.
71*
72* @param[in] ps_mb_part
73*  pointer to current mb partition ctxt with respect to ME
74*
75* @param[in] ps_me_ctxt
76*  pointer to me context
77*
78* @param[in] u4_lambda_motion
79*  lambda motion
80*
81* @param[in] u4_enable_fast_sad
82*  enable/disable fast sad computation
83*
84* @returns  mv pair & corresponding distortion and cost
85*
86* @remarks Diamond Srch, radius is 1
87*
88*******************************************************************************
89*/
90void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist)
91{
92    /* MB partition info */
93    mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
94
95    /* lagrange parameter */
96    UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
97
98    /* srch range*/
99    WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
100    WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
101    WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
102    WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
103
104    /* enabled fast sad computation */
105//    UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
106
107    /* pointer to src macro block */
108    UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
109    UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist];
110
111    /* strides */
112    WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
113    WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
114
115    /* least cost */
116    WORD32 i4_cost_least = ps_mb_part->i4_mb_cost;
117
118    /* least sad */
119    WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
120
121    /* mv pair */
122    WORD16 i2_mvx, i2_mvy;
123
124    /* mv bits */
125    UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
126
127    /* temp var */
128    WORD32 i4_cost[4];
129    WORD32 i4_sad[4];
130    UWORD8 *pu1_ref;
131    WORD16 i2_mv_u_x, i2_mv_u_y;
132
133    /* Diamond search Iteration Max Cnt */
134    UWORD32 u4_num_layers = ps_me_ctxt->u4_num_layers;
135
136    /* temp var */
137//    UWORD8 u1_prev_jump = NONE;
138//    UWORD8 u1_curr_jump = NONE;
139//    UWORD8 u1_next_jump;
140//    WORD32 mask_arr[5] = {15, 13, 14, 7, 11};
141//    WORD32 mask;
142//    UWORD8 *apu1_ref[4];
143//    WORD32 i, cnt;
144//    WORD32 dia[4][2] = {{-1, 0}, {1, 0}, {0, -1}, {0, 1}};
145
146    /* mv with best sad during initial evaluation */
147    i2_mvx = ps_mb_part->s_mv_curr.i2_mvx;
148    i2_mvy = ps_mb_part->s_mv_curr.i2_mvy;
149
150    i2_mv_u_x = i2_mvx;
151    i2_mv_u_y = i2_mvy;
152
153    while (u4_num_layers--)
154    {
155        /* FIXME : is this the write way to check for out of bounds ? */
156        if ( (i2_mvx - 1 < i4_srch_range_w) ||
157                        (i2_mvx + 1 > i4_srch_range_e) ||
158                        (i2_mvy - 1 < i4_srch_range_n) ||
159                        (i2_mvy + 1 > i4_srch_range_s) )
160        {
161            break;
162        }
163
164        pu1_ref = pu1_ref_mb + i2_mvx + (i2_mvy * i4_ref_strd);
165
166        ps_me_ctxt->pf_ime_compute_sad4_diamond(pu1_ref,
167                                                pu1_curr_mb,
168                                                i4_ref_strd,
169                                                i4_src_strd,
170                                                i4_sad);
171
172        DEBUG_SAD_HISTOGRAM_ADD(i4_sad[0], 2);
173        DEBUG_SAD_HISTOGRAM_ADD(i4_sad[1], 2);
174        DEBUG_SAD_HISTOGRAM_ADD(i4_sad[2], 2);
175        DEBUG_SAD_HISTOGRAM_ADD(i4_sad[3], 2);
176
177        /* compute cost */
178        i4_cost[0] = i4_sad[0] + u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx]
179                                                                   + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
180        i4_cost[1] = i4_sad[1] + u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx]
181                                                                   + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
182        i4_cost[2] = i4_sad[2] + u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
183                                                                   + pu1_mv_bits[((i2_mvy - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
184        i4_cost[3] = i4_sad[3] + u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
185                                                                   + pu1_mv_bits[((i2_mvy + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
186
187
188        if (i4_cost_least > i4_cost[0])
189        {
190            i4_cost_least = i4_cost[0];
191            i4_distortion_least = i4_sad[0];
192
193            i2_mv_u_x = (i2_mvx - 1);
194            i2_mv_u_y = i2_mvy;
195        }
196
197        if (i4_cost_least > i4_cost[1])
198        {
199            i4_cost_least = i4_cost[1];
200            i4_distortion_least = i4_sad[1];
201
202            i2_mv_u_x = (i2_mvx + 1);
203            i2_mv_u_y = i2_mvy;
204        }
205
206        if (i4_cost_least > i4_cost[2])
207        {
208            i4_cost_least = i4_cost[2];
209            i4_distortion_least = i4_sad[2];
210
211            i2_mv_u_x = i2_mvx;
212            i2_mv_u_y = i2_mvy - 1;
213        }
214
215        if (i4_cost_least > i4_cost[3])
216        {
217            i4_cost_least = i4_cost[3];
218            i4_distortion_least = i4_sad[3];
219
220            i2_mv_u_x = i2_mvx;
221            i2_mv_u_y = i2_mvy + 1;
222        }
223
224        if( (i2_mv_u_x == i2_mvx) && (i2_mv_u_y == i2_mvy))
225        {
226            ps_mb_part->u4_exit = 1;
227            break;
228        }
229        else
230        {
231            i2_mvx = i2_mv_u_x;
232            i2_mvy = i2_mv_u_y;
233        }
234
235
236    }
237
238    if (i4_cost_least < ps_mb_part->i4_mb_cost)
239    {
240        ps_mb_part->i4_mb_cost = i4_cost_least;
241        ps_mb_part->i4_mb_distortion = i4_distortion_least;
242        ps_mb_part->s_mv_curr.i2_mvx = i2_mvx;
243        ps_mb_part->s_mv_curr.i2_mvy = i2_mvy;
244    }
245
246}
247
248
249/**
250*******************************************************************************
251*
252* @brief This function computes the best motion vector among the tentative mv
253* candidates chosen.
254*
255* @par Description:
256*  This function determines the position in the search window at which the motion
257*  estimation should begin in order to minimise the number of search iterations.
258*
259* @param[in] ps_mb_part
260*  pointer to current mb partition ctxt with respect to ME
261*
262* @param[in] u4_lambda_motion
263*  lambda motion
264*
265* @param[in] u4_fast_flag
266*  enable/disable fast sad computation
267*
268* @returns  mv pair & corresponding distortion and cost
269*
270* @remarks none
271*
272*******************************************************************************
273*/
274
275void ime_evaluate_init_srchposn_16x16
276        (
277            me_ctxt_t *ps_me_ctxt,
278            WORD32 i4_reflist
279        )
280{
281    UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
282
283    /* candidate mv cnt */
284    UWORD32 u4_num_candidates = ps_me_ctxt->u4_num_candidates[i4_reflist];
285
286    /* list of candidate mvs */
287    ime_mv_t *ps_mv_list = ps_me_ctxt->as_mv_init_search[i4_reflist];
288
289    /* pointer to src macro block */
290    UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
291    UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist];
292
293    /* strides */
294    WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
295    WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
296
297    /* enabled fast sad computation */
298    UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
299
300    /* SAD(distortion metric) of an 8x8 block */
301    WORD32 i4_mb_distortion;
302
303    /* cost = distortion + u4_lambda_motion * rate */
304    WORD32 i4_mb_cost, i4_mb_cost_least = INT_MAX, i4_distortion_least = INT_MAX;
305
306    /* mb partitions info */
307    mb_part_ctxt *ps_mb_part = &(ps_me_ctxt->as_mb_part[i4_reflist]);
308
309    /* mv bits */
310    UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
311
312    /* temp var */
313    UWORD32  i, j;
314    WORD32 i4_srch_pos_idx = 0;
315    UWORD8 *pu1_ref = NULL;
316
317    /* Carry out a search using each of the motion vector pairs identified above as predictors. */
318    /* TODO : Just like Skip, Do we need to add any bias to zero mv as well */
319    for(i = 0; i < u4_num_candidates; i++)
320    {
321        /* compute sad */
322        WORD32 c_sad = 1;
323
324        for(j = 0; j < i; j++ )
325        {
326            if ( (ps_mv_list[i].i2_mvx == ps_mv_list[j].i2_mvx) &&
327                            (ps_mv_list[i].i2_mvy == ps_mv_list[j].i2_mvy) )
328            {
329                c_sad = 0;
330                break;
331            }
332        }
333        if(c_sad)
334        {
335            /* adjust ref pointer */
336            pu1_ref = pu1_ref_mb + ps_mv_list[i].i2_mvx + (ps_mv_list[i].i2_mvy * i4_ref_strd);
337
338            /* compute distortion */
339            ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least, &i4_mb_distortion);
340
341            DEBUG_SAD_HISTOGRAM_ADD(i4_mb_distortion, 3);
342            /* compute cost */
343            i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ (ps_mv_list[i].i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
344                            + pu1_mv_bits[(ps_mv_list[i].i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] );
345
346            if (i4_mb_cost < i4_mb_cost_least)
347            {
348                i4_mb_cost_least = i4_mb_cost;
349
350                i4_distortion_least = i4_mb_distortion;
351
352                i4_srch_pos_idx = i;
353            }
354        }
355    }
356
357    if (i4_mb_cost_least < ps_mb_part->i4_mb_cost)
358    {
359        ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx;
360        ps_mb_part->i4_mb_cost = i4_mb_cost_least;
361        ps_mb_part->i4_mb_distortion = i4_distortion_least;
362        ps_mb_part->s_mv_curr.i2_mvx = ps_mv_list[i4_srch_pos_idx].i2_mvx;
363        ps_mb_part->s_mv_curr.i2_mvy = ps_mv_list[i4_srch_pos_idx].i2_mvy;
364    }
365}
366
367/**
368*******************************************************************************
369*
370* @brief Searches for the best matching full pixel predictor within the search
371* range
372*
373* @par Description:
374*  This function begins by computing the mv predict vector for the current mb.
375*  This is used for cost computations. Further basing on the algo. chosen, it
376*  looks through a set of candidate vectors that best represent the mb a least
377*  cost and returns this information.
378*
379* @param[in] ps_proc
380*  pointer to current proc ctxt
381*
382* @param[in] ps_me_ctxt
383*  pointer to me context
384*
385* @returns  mv pair & corresponding distortion and cost
386*
387* @remarks none
388*
389*******************************************************************************
390*/
391void ime_full_pel_motion_estimation_16x16
392    (
393        me_ctxt_t *ps_me_ctxt,
394        WORD32 i4_ref_list
395    )
396{
397    /* mb part info */
398    mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_ref_list];
399
400    /******************************************************************/
401    /* Modify Search range about initial candidate instead of zero mv */
402    /******************************************************************/
403    /*
404     * FIXME: The motion vectors in a way can become unbounded. It may so happen that
405     * MV might exceed the limit of the profile configured.
406     */
407    ps_me_ctxt->i4_srch_range_w = MAX(ps_me_ctxt->i4_srch_range_w,
408                                      -ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
409    ps_me_ctxt->i4_srch_range_e = MIN(ps_me_ctxt->i4_srch_range_e,
410                                       ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
411    ps_me_ctxt->i4_srch_range_n = MAX(ps_me_ctxt->i4_srch_range_n,
412                                      -ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
413    ps_me_ctxt->i4_srch_range_s = MIN(ps_me_ctxt->i4_srch_range_s,
414                                       ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
415
416    /************************************************************/
417    /* Traverse about best initial candidate for mv             */
418    /************************************************************/
419
420    switch (ps_me_ctxt->u4_me_speed_preset)
421    {
422        case DMND_SRCH:
423            ime_diamond_search_16x16(ps_me_ctxt, i4_ref_list);
424            break;
425        default:
426            assert(0);
427            break;
428    }
429}
430
431/**
432*******************************************************************************
433*
434* @brief Searches for the best matching sub pixel predictor within the search
435* range
436*
437* @par Description:
438*  This function begins by searching across all sub pixel sample points
439*  around the full pel motion vector. The vector with least cost is chosen as
440*  the mv for the current mb. If the skip mode is not evaluated while analysing
441*  the initial search candidates then analyse it here and update the mv.
442*
443* @param[in] ps_proc
444*  pointer to current proc ctxt
445*
446* @param[in] ps_me_ctxt
447*  pointer to me context
448*
449* @returns none
450*
451* @remarks none
452*
453*******************************************************************************
454*/
455void ime_sub_pel_motion_estimation_16x16
456    (
457        me_ctxt_t *ps_me_ctxt,
458        WORD32 i4_reflist
459    )
460{
461    /* pointers to src & ref macro block */
462    UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
463
464    /* pointers to ref. half pel planes */
465    UWORD8 *pu1_ref_mb_half_x;
466    UWORD8 *pu1_ref_mb_half_y;
467    UWORD8 *pu1_ref_mb_half_xy;
468
469    /* pointers to ref. half pel planes */
470    UWORD8 *pu1_ref_mb_half_x_temp;
471    UWORD8 *pu1_ref_mb_half_y_temp;
472    UWORD8 *pu1_ref_mb_half_xy_temp;
473
474    /* strides */
475    WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
476
477    WORD32 i4_ref_strd = ps_me_ctxt->u4_subpel_buf_strd;
478
479    /* mb partitions info */
480    mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
481
482    /* SAD(distortion metric) of an mb */
483    WORD32 i4_mb_distortion;
484    WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
485
486    /* cost = distortion + u4_lambda_motion * rate */
487    WORD32 i4_mb_cost;
488    WORD32 i4_mb_cost_least = ps_mb_part->i4_mb_cost;
489
490    /*Best half pel buffer*/
491    UWORD8 *pu1_best_hpel_buf = NULL;
492
493    /* mv bits */
494    UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
495
496    /* Motion vectors in full-pel units */
497    WORD16 mv_x, mv_y;
498
499    /* lambda - lagrange constant */
500    UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
501
502    /* Flags to check if half pel points needs to be evaluated */
503    /**************************************/
504    /* 1 bit for each half pel candidate  */
505    /* bit 0 - half x = 1, half y = 0     */
506    /* bit 1 - half x = -1, half y = 0    */
507    /* bit 2 - half x = 0, half y = 1     */
508    /* bit 3 - half x = 0, half y = -1    */
509    /* bit 4 - half x = 1, half y = 1     */
510    /* bit 5 - half x = -1, half y = 1    */
511    /* bit 6 - half x = 1, half y = -1    */
512    /* bit 7 - half x = -1, half y = -1   */
513    /**************************************/
514    /* temp var */
515    WORD16 i2_mv_u_x, i2_mv_u_y;
516    WORD32 i, j;
517    WORD32 ai4_sad[8];
518
519    WORD32 i4_srch_pos_idx = ps_mb_part->i4_srch_pos_idx;
520
521    i2_mv_u_x = ps_mb_part->s_mv_curr.i2_mvx;
522    i2_mv_u_y = ps_mb_part->s_mv_curr.i2_mvy;
523
524    /************************************************************/
525    /* Evaluate half pel                                        */
526    /************************************************************/
527    mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2;
528    mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2;
529
530
531    /**************************************************************/
532    /* ps_me_ctxt->pu1_half_x points to the half pel pixel on the */
533    /* left side of full pel                                      */
534    /* ps_me_ctxt->pu1_half_y points to the half pel pixel on the */
535    /* top  side of full pel                                      */
536    /* ps_me_ctxt->pu1_half_xy points to the half pel pixel       */
537    /* on the top left side of full pel                           */
538    /* for the function pf_ime_sub_pel_compute_sad_16x16 the      */
539    /* default postions are                                       */
540    /* ps_me_ctxt->pu1_half_x = right halp_pel                    */
541    /*  ps_me_ctxt->pu1_half_y = bottom halp_pel                  */
542    /*  ps_me_ctxt->pu1_half_xy = bottom right halp_pel           */
543    /* Hence corresponding adjustments made here                  */
544    /**************************************************************/
545
546    pu1_ref_mb_half_x_temp = pu1_ref_mb_half_x = ps_me_ctxt->apu1_subpel_buffs[0] + 1;
547    pu1_ref_mb_half_y_temp = pu1_ref_mb_half_y = ps_me_ctxt->apu1_subpel_buffs[1] + 1 + i4_ref_strd;
548    pu1_ref_mb_half_xy_temp = pu1_ref_mb_half_xy = ps_me_ctxt->apu1_subpel_buffs[2] + 1 + i4_ref_strd;
549
550    ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16(pu1_curr_mb, pu1_ref_mb_half_x,
551                                                 pu1_ref_mb_half_y,
552                                                 pu1_ref_mb_half_xy,
553                                                 i4_src_strd, i4_ref_strd,
554                                                 ai4_sad);
555
556    /* Half x plane */
557    for(i = 0; i < 2; i++)
558    {
559        WORD32 mv_x_tmp = (mv_x << 2) + 2;
560        WORD32 mv_y_tmp = (mv_y << 2);
561
562        mv_x_tmp -= (i * 4);
563
564        i4_mb_distortion = ai4_sad[i];
565
566        /* compute cost */
567        i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
568                        + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] );
569
570        if (i4_mb_cost < i4_mb_cost_least)
571        {
572            i4_mb_cost_least = i4_mb_cost;
573
574            i4_distortion_least = i4_mb_distortion;
575
576            i2_mv_u_x = mv_x_tmp;
577
578            i2_mv_u_y = mv_y_tmp;
579
580#ifndef HP_PL /*choosing whether left or right half_x*/
581            ps_me_ctxt->apu1_subpel_buffs[0] = pu1_ref_mb_half_x_temp - i;
582            pu1_best_hpel_buf = pu1_ref_mb_half_x_temp - i;
583
584            i4_srch_pos_idx = 0;
585#endif
586        }
587
588    }
589
590    /* Half y plane */
591    for(i = 0; i < 2; i++)
592    {
593        WORD32 mv_x_tmp = (mv_x << 2);
594        WORD32 mv_y_tmp = (mv_y << 2) + 2;
595
596        mv_y_tmp -= (i * 4);
597
598        i4_mb_distortion = ai4_sad[2 + i];
599
600        /* compute cost */
601        i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
602                        + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] );
603
604        if (i4_mb_cost < i4_mb_cost_least)
605        {
606            i4_mb_cost_least = i4_mb_cost;
607
608            i4_distortion_least = i4_mb_distortion;
609
610            i2_mv_u_x = mv_x_tmp;
611
612            i2_mv_u_y = mv_y_tmp;
613
614#ifndef HP_PL/*choosing whether top or bottom half_y*/
615            ps_me_ctxt->apu1_subpel_buffs[1] = pu1_ref_mb_half_y_temp  - i*(i4_ref_strd);
616            pu1_best_hpel_buf = pu1_ref_mb_half_y_temp  - i*(i4_ref_strd);
617
618            i4_srch_pos_idx = 1;
619#endif
620        }
621
622    }
623
624    /* Half xy plane */
625    for(j = 0; j < 2; j++)
626    {
627        for(i = 0; i < 2; i++)
628        {
629            WORD32 mv_x_tmp = (mv_x << 2) + 2;
630            WORD32 mv_y_tmp = (mv_y << 2) + 2;
631
632            mv_x_tmp -= (i * 4);
633            mv_y_tmp -= (j * 4);
634
635            i4_mb_distortion = ai4_sad[4 + i + 2 * j];
636
637            /* compute cost */
638            i4_mb_cost = i4_mb_distortion + u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
639                            + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] );
640
641            if (i4_mb_cost < i4_mb_cost_least)
642            {
643                i4_mb_cost_least = i4_mb_cost;
644
645                i4_distortion_least = i4_mb_distortion;
646
647                i2_mv_u_x = mv_x_tmp;
648
649                i2_mv_u_y = mv_y_tmp;
650
651#ifndef HP_PL /*choosing between four half_xy */
652                ps_me_ctxt->apu1_subpel_buffs[2] = pu1_ref_mb_half_xy_temp  - j*(i4_ref_strd) - i;
653                pu1_best_hpel_buf =  pu1_ref_mb_half_xy_temp  - j*(i4_ref_strd) - i;
654
655                i4_srch_pos_idx = 2;
656#endif
657            }
658
659        }
660    }
661
662    if (i4_mb_cost_least < ps_mb_part->i4_mb_cost)
663    {
664        ps_mb_part->i4_mb_cost = i4_mb_cost_least;
665        ps_mb_part->i4_mb_distortion = i4_distortion_least;
666        ps_mb_part->s_mv_curr.i2_mvx = i2_mv_u_x;
667        ps_mb_part->s_mv_curr.i2_mvy = i2_mv_u_y;
668        ps_mb_part->pu1_best_hpel_buf = pu1_best_hpel_buf;
669        ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx;
670    }
671}
672
673/**
674*******************************************************************************
675*
676* @brief This function computes cost of skip macroblocks
677*
678* @par Description:
679*
680* @param[in] ps_me_ctxt
681*  pointer to me ctxt
682*
683*
684* @returns  none
685*
686* @remarks
687* NOTE: while computing the skip cost, do not enable early exit from compute
688* sad function because, a negative bias gets added later
689* Note tha the last ME candidate in me ctxt is taken as skip motion vector
690*
691*******************************************************************************
692*/
693void ime_compute_skip_cost
694    (
695         me_ctxt_t *ps_me_ctxt,
696         ime_mv_t *ps_skip_mv,
697         mb_part_ctxt *ps_smb_part_info,
698         UWORD32 u4_use_stat_sad,
699         WORD32 i4_reflist,
700         WORD32 i4_is_slice_type_b
701    )
702{
703
704    /* SAD(distortion metric) of an mb */
705    WORD32 i4_mb_distortion;
706
707    /* cost = distortion + u4_lambda_motion * rate */
708    WORD32 i4_mb_cost;
709
710    /* temp var */
711    UWORD8 *pu1_ref = NULL;
712
713    ime_mv_t s_skip_mv;
714
715    s_skip_mv.i2_mvx = (ps_skip_mv->i2_mvx +2)>>2;
716    s_skip_mv.i2_mvy = (ps_skip_mv->i2_mvy +2)>>2;
717
718    /* Check if the skip mv is out of bounds or subpel */
719    {
720        /* skip mv */
721        ime_mv_t s_clip_skip_mv;
722
723        s_clip_skip_mv.i2_mvx = CLIP3(ps_me_ctxt->i4_srch_range_w, ps_me_ctxt->i4_srch_range_e, s_skip_mv.i2_mvx);
724        s_clip_skip_mv.i2_mvy = CLIP3(ps_me_ctxt->i4_srch_range_n, ps_me_ctxt->i4_srch_range_s, s_skip_mv.i2_mvy);
725
726        if ((s_clip_skip_mv.i2_mvx != s_skip_mv.i2_mvx) ||
727           (s_clip_skip_mv.i2_mvy != s_skip_mv.i2_mvy) ||
728           (ps_skip_mv->i2_mvx & 0x3) ||
729           (ps_skip_mv->i2_mvy & 0x3))
730        {
731            return ;
732        }
733    }
734
735
736    /* adjust ref pointer */
737    pu1_ref = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist] + s_skip_mv.i2_mvx
738                    + (s_skip_mv.i2_mvy * ps_me_ctxt->i4_rec_strd);
739
740    if(u4_use_stat_sad == 1)
741    {
742        UWORD32 u4_is_nonzero;
743
744        ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16(
745                        ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd,
746                        ps_me_ctxt->i4_rec_strd, ps_me_ctxt->pu2_sad_thrsh,
747                        &i4_mb_distortion, &u4_is_nonzero);
748
749        if (u4_is_nonzero == 0 || i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
750        {
751            ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */
752            ps_me_ctxt->i4_min_sad = (u4_is_nonzero == 0) ? 0 : i4_mb_distortion;
753        }
754    }
755    else
756    {
757        ps_me_ctxt->pf_ime_compute_sad_16x16[ps_me_ctxt->u4_enable_fast_sad](
758                        ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd,
759                        ps_me_ctxt->i4_rec_strd, INT_MAX, &i4_mb_distortion);
760
761        if(i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
762        {
763            ps_me_ctxt->i4_min_sad = i4_mb_distortion;
764            ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */
765        }
766    }
767
768
769    /* for skip mode cost & distortion are identical
770     * But we shall add a bias to favor skip mode.
771     * Doc. JVT B118 Suggests SKIP_BIAS as 16.
772     * TODO : Empirical analysis of SKIP_BIAS is necessary */
773
774    i4_mb_cost = i4_mb_distortion - (ps_me_ctxt->u4_lambda_motion * (ps_me_ctxt->i4_skip_bias[0] + ps_me_ctxt->i4_skip_bias[1]  * i4_is_slice_type_b));
775
776    if (i4_mb_cost <= ps_smb_part_info->i4_mb_cost)
777    {
778        ps_smb_part_info->i4_mb_cost = i4_mb_cost;
779        ps_smb_part_info->i4_mb_distortion = i4_mb_distortion;
780        ps_smb_part_info->s_mv_curr.i2_mvx = s_skip_mv.i2_mvx;
781        ps_smb_part_info->s_mv_curr.i2_mvy = s_skip_mv.i2_mvy;
782    }
783}
784
785