1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12#include "mcomp.h"
13#include "vpx_mem/vpx_mem.h"
14
15#include <stdio.h>
16#include <limits.h>
17#include <math.h>
18
19#ifdef ENTROPY_STATS
20static int mv_ref_ct [31] [4] [2];
21static int mv_mode_cts [4] [2];
22#endif
23
24static int mv_bits_sadcost[256];
25
26void vp8cx_init_mv_bits_sadcost()
27{
28    int i;
29
30    for (i = 0; i < 256; i++)
31    {
32        mv_bits_sadcost[i] = (int)sqrt(i * 16);
33    }
34}
35
36
37int vp8_mv_bit_cost(MV *mv, MV *ref, int *mvcost[2], int Weight)
38{
39    // MV costing is based on the distribution of vectors in the previous frame and as such will tend to
40    // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the
41    // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks.
42    // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors.
43    return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col) >> 1]) * Weight) >> 7;
44}
45
46static int mv_err_cost(MV *mv, MV *ref, int *mvcost[2], int error_per_bit)
47{
48    //int i;
49    //return ((mvcost[0][(mv->row - ref->row)>>1] + mvcost[1][(mv->col - ref->col)>>1] + 128) * error_per_bit) >> 8;
50    //return ( (vp8_mv_bit_cost(mv,  ref, mvcost, 100) + 128) * error_per_bit) >> 8;
51
52    //i = (vp8_mv_bit_cost(mv,  ref, mvcost, 100) * error_per_bit + 128) >> 8;
53    return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col) >> 1]) * error_per_bit + 128) >> 8;
54    //return (vp8_mv_bit_cost(mv,  ref, mvcost, 128) * error_per_bit + 128) >> 8;
55}
56
57
58static int mv_bits(MV *mv, MV *ref, int *mvcost[2])
59{
60    // get the estimated number of bits for a motion vector, to be used for costing in SAD based
61    // motion estimation
62    return ((mvcost[0][(mv->row - ref->row) >> 1]  +  mvcost[1][(mv->col - ref->col)>> 1]) + 128) >> 8;
63}
64
65void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
66{
67    int Len;
68    int search_site_count = 0;
69
70
71    // Generate offsets for 4 search sites per step.
72    Len = MAX_FIRST_STEP;
73    x->ss[search_site_count].mv.col = 0;
74    x->ss[search_site_count].mv.row = 0;
75    x->ss[search_site_count].offset = 0;
76    search_site_count++;
77
78    while (Len > 0)
79    {
80
81        // Compute offsets for search sites.
82        x->ss[search_site_count].mv.col = 0;
83        x->ss[search_site_count].mv.row = -Len;
84        x->ss[search_site_count].offset = -Len * stride;
85        search_site_count++;
86
87        // Compute offsets for search sites.
88        x->ss[search_site_count].mv.col = 0;
89        x->ss[search_site_count].mv.row = Len;
90        x->ss[search_site_count].offset = Len * stride;
91        search_site_count++;
92
93        // Compute offsets for search sites.
94        x->ss[search_site_count].mv.col = -Len;
95        x->ss[search_site_count].mv.row = 0;
96        x->ss[search_site_count].offset = -Len;
97        search_site_count++;
98
99        // Compute offsets for search sites.
100        x->ss[search_site_count].mv.col = Len;
101        x->ss[search_site_count].mv.row = 0;
102        x->ss[search_site_count].offset = Len;
103        search_site_count++;
104
105        // Contract.
106        Len /= 2;
107    }
108
109    x->ss_count = search_site_count;
110    x->searches_per_step = 4;
111}
112
113void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
114{
115    int Len;
116    int search_site_count = 0;
117
118    // Generate offsets for 8 search sites per step.
119    Len = MAX_FIRST_STEP;
120    x->ss[search_site_count].mv.col = 0;
121    x->ss[search_site_count].mv.row = 0;
122    x->ss[search_site_count].offset = 0;
123    search_site_count++;
124
125    while (Len > 0)
126    {
127
128        // Compute offsets for search sites.
129        x->ss[search_site_count].mv.col = 0;
130        x->ss[search_site_count].mv.row = -Len;
131        x->ss[search_site_count].offset = -Len * stride;
132        search_site_count++;
133
134        // Compute offsets for search sites.
135        x->ss[search_site_count].mv.col = 0;
136        x->ss[search_site_count].mv.row = Len;
137        x->ss[search_site_count].offset = Len * stride;
138        search_site_count++;
139
140        // Compute offsets for search sites.
141        x->ss[search_site_count].mv.col = -Len;
142        x->ss[search_site_count].mv.row = 0;
143        x->ss[search_site_count].offset = -Len;
144        search_site_count++;
145
146        // Compute offsets for search sites.
147        x->ss[search_site_count].mv.col = Len;
148        x->ss[search_site_count].mv.row = 0;
149        x->ss[search_site_count].offset = Len;
150        search_site_count++;
151
152        // Compute offsets for search sites.
153        x->ss[search_site_count].mv.col = -Len;
154        x->ss[search_site_count].mv.row = -Len;
155        x->ss[search_site_count].offset = -Len * stride - Len;
156        search_site_count++;
157
158        // Compute offsets for search sites.
159        x->ss[search_site_count].mv.col = Len;
160        x->ss[search_site_count].mv.row = -Len;
161        x->ss[search_site_count].offset = -Len * stride + Len;
162        search_site_count++;
163
164        // Compute offsets for search sites.
165        x->ss[search_site_count].mv.col = -Len;
166        x->ss[search_site_count].mv.row = Len;
167        x->ss[search_site_count].offset = Len * stride - Len;
168        search_site_count++;
169
170        // Compute offsets for search sites.
171        x->ss[search_site_count].mv.col = Len;
172        x->ss[search_site_count].mv.row = Len;
173        x->ss[search_site_count].offset = Len * stride + Len;
174        search_site_count++;
175
176
177        // Contract.
178        Len /= 2;
179    }
180
181    x->ss_count = search_site_count;
182    x->searches_per_step = 8;
183}
184
185
186#define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
187#define PRE(r,c) (*(d->base_pre) + d->pre + ((r)>>2) * d->pre_stride + ((c)>>2)) // pointer to predictor base of a motionvector
188#define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc
189#define DIST(r,c) vfp->svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
190#define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
191#define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
192#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
193#define MIN(x,y) (((x)<(y))?(x):(y))
194#define MAX(x,y) (((x)>(y))?(x):(y))
195
196//#define CHECK_BETTER(v,r,c) if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }
197
198int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
199{
200    unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
201    unsigned char *z = (*(b->base_src) + b->src);
202
203    int rr = ref_mv->row >> 1, rc = ref_mv->col >> 1;
204    int br = bestmv->row << 2, bc = bestmv->col << 2;
205    int tr = br, tc = bc;
206    unsigned int besterr = INT_MAX;
207    unsigned int left, right, up, down, diag;
208    unsigned int sse;
209    unsigned int whichdir;
210    unsigned int halfiters = 4;
211    unsigned int quarteriters = 4;
212
213    int minc = MAX(x->mv_col_min << 2, (ref_mv->col >> 1) - ((1 << mvlong_width) - 1));
214    int maxc = MIN(x->mv_col_max << 2, (ref_mv->col >> 1) + ((1 << mvlong_width) - 1));
215    int minr = MAX(x->mv_row_min << 2, (ref_mv->row >> 1) - ((1 << mvlong_width) - 1));
216    int maxr = MIN(x->mv_row_max << 2, (ref_mv->row >> 1) + ((1 << mvlong_width) - 1));
217
218    // central mv
219    bestmv->row <<= 3;
220    bestmv->col <<= 3;
221
222    // calculate central point error
223    besterr = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
224    besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
225
226    // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
227    while (--halfiters)
228    {
229        // 1/2 pel
230        CHECK_BETTER(left, tr, tc - 2);
231        CHECK_BETTER(right, tr, tc + 2);
232        CHECK_BETTER(up, tr - 2, tc);
233        CHECK_BETTER(down, tr + 2, tc);
234
235        whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
236
237        switch (whichdir)
238        {
239        case 0:
240            CHECK_BETTER(diag, tr - 2, tc - 2);
241            break;
242        case 1:
243            CHECK_BETTER(diag, tr - 2, tc + 2);
244            break;
245        case 2:
246            CHECK_BETTER(diag, tr + 2, tc - 2);
247            break;
248        case 3:
249            CHECK_BETTER(diag, tr + 2, tc + 2);
250            break;
251        }
252
253        // no reason to check the same one again.
254        if (tr == br && tc == bc)
255            break;
256
257        tr = br;
258        tc = bc;
259    }
260
261    // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
262    // 1/4 pel
263    while (--quarteriters)
264    {
265        CHECK_BETTER(left, tr, tc - 1);
266        CHECK_BETTER(right, tr, tc + 1);
267        CHECK_BETTER(up, tr - 1, tc);
268        CHECK_BETTER(down, tr + 1, tc);
269
270        whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
271
272        switch (whichdir)
273        {
274        case 0:
275            CHECK_BETTER(diag, tr - 1, tc - 1);
276            break;
277        case 1:
278            CHECK_BETTER(diag, tr - 1, tc + 1);
279            break;
280        case 2:
281            CHECK_BETTER(diag, tr + 1, tc - 1);
282            break;
283        case 3:
284            CHECK_BETTER(diag, tr + 1, tc + 1);
285            break;
286        }
287
288        // no reason to check the same one again.
289        if (tr == br && tc == bc)
290            break;
291
292        tr = br;
293        tc = bc;
294    }
295
296    bestmv->row = br << 1;
297    bestmv->col = bc << 1;
298
299    if ((abs(bestmv->col - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs(bestmv->row - ref_mv->row) > MAX_FULL_PEL_VAL))
300        return INT_MAX;
301
302    return besterr;
303}
304#undef MVC
305#undef PRE
306#undef SP
307#undef DIST
308#undef ERR
309#undef CHECK_BETTER
310#undef MIN
311#undef MAX
312int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
313{
314    int bestmse = INT_MAX;
315    MV startmv;
316    //MV this_mv;
317    MV this_mv;
318    unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
319    unsigned char *z = (*(b->base_src) + b->src);
320    int left, right, up, down, diag;
321    unsigned int sse;
322    int whichdir ;
323
324
325    // Trap uncodable vectors
326    if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL))
327    {
328        bestmv->row <<= 3;
329        bestmv->col <<= 3;
330        return INT_MAX;
331    }
332
333    // central mv
334    bestmv->row <<= 3;
335    bestmv->col <<= 3;
336    startmv = *bestmv;
337
338    // calculate central point error
339    bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
340    bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
341
342    // go left then right and check error
343    this_mv.row = startmv.row;
344    this_mv.col = ((startmv.col - 8) | 4);
345    left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
346    left += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
347
348    if (left < bestmse)
349    {
350        *bestmv = this_mv;
351        bestmse = left;
352    }
353
354    this_mv.col += 8;
355    right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
356    right += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
357
358    if (right < bestmse)
359    {
360        *bestmv = this_mv;
361        bestmse = right;
362    }
363
364    // go up then down and check error
365    this_mv.col = startmv.col;
366    this_mv.row = ((startmv.row - 8) | 4);
367    up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
368    up += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
369
370    if (up < bestmse)
371    {
372        *bestmv = this_mv;
373        bestmse = up;
374    }
375
376    this_mv.row += 8;
377    down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
378    down += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
379
380    if (down < bestmse)
381    {
382        *bestmv = this_mv;
383        bestmse = down;
384    }
385
386
387    // now check 1 more diagonal
388    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
389    //for(whichdir =0;whichdir<4;whichdir++)
390    //{
391    this_mv = startmv;
392
393    switch (whichdir)
394    {
395    case 0:
396        this_mv.col = (this_mv.col - 8) | 4;
397        this_mv.row = (this_mv.row - 8) | 4;
398        diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
399        break;
400    case 1:
401        this_mv.col += 4;
402        this_mv.row = (this_mv.row - 8) | 4;
403        diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
404        break;
405    case 2:
406        this_mv.col = (this_mv.col - 8) | 4;
407        this_mv.row += 4;
408        diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
409        break;
410    case 3:
411    default:
412        this_mv.col += 4;
413        this_mv.row += 4;
414        diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
415        break;
416    }
417
418    diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
419
420    if (diag < bestmse)
421    {
422        *bestmv = this_mv;
423        bestmse = diag;
424    }
425
426//  }
427
428
429    // time to check quarter pels.
430    if (bestmv->row < startmv.row)
431        y -= d->pre_stride;
432
433    if (bestmv->col < startmv.col)
434        y--;
435
436    startmv = *bestmv;
437
438
439
440    // go left then right and check error
441    this_mv.row = startmv.row;
442
443    if (startmv.col & 7)
444    {
445        this_mv.col = startmv.col - 2;
446        left = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
447    }
448    else
449    {
450        this_mv.col = (startmv.col - 8) | 6;
451        left = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);
452    }
453
454    left += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
455
456    if (left < bestmse)
457    {
458        *bestmv = this_mv;
459        bestmse = left;
460    }
461
462    this_mv.col += 4;
463    right = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
464    right += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
465
466    if (right < bestmse)
467    {
468        *bestmv = this_mv;
469        bestmse = right;
470    }
471
472    // go up then down and check error
473    this_mv.col = startmv.col;
474
475    if (startmv.row & 7)
476    {
477        this_mv.row = startmv.row - 2;
478        up = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
479    }
480    else
481    {
482        this_mv.row = (startmv.row - 8) | 6;
483        up = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
484    }
485
486    up += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
487
488    if (up < bestmse)
489    {
490        *bestmv = this_mv;
491        bestmse = up;
492    }
493
494    this_mv.row += 4;
495    down = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
496    down += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
497
498    if (down < bestmse)
499    {
500        *bestmv = this_mv;
501        bestmse = down;
502    }
503
504
505    // now check 1 more diagonal
506    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
507
508//  for(whichdir=0;whichdir<4;whichdir++)
509//  {
510    this_mv = startmv;
511
512    switch (whichdir)
513    {
514    case 0:
515
516        if (startmv.row & 7)
517        {
518            this_mv.row -= 2;
519
520            if (startmv.col & 7)
521            {
522                this_mv.col -= 2;
523                diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
524            }
525            else
526            {
527                this_mv.col = (startmv.col - 8) | 6;
528                diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
529            }
530        }
531        else
532        {
533            this_mv.row = (startmv.row - 8) | 6;
534
535            if (startmv.col & 7)
536            {
537                this_mv.col -= 2;
538                diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
539            }
540            else
541            {
542                this_mv.col = (startmv.col - 8) | 6;
543                diag = vfp->svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);
544            }
545        }
546
547        break;
548    case 1:
549        this_mv.col += 2;
550
551        if (startmv.row & 7)
552        {
553            this_mv.row -= 2;
554            diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
555        }
556        else
557        {
558            this_mv.row = (startmv.row - 8) | 6;
559            diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
560        }
561
562        break;
563    case 2:
564        this_mv.row += 2;
565
566        if (startmv.col & 7)
567        {
568            this_mv.col -= 2;
569            diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
570        }
571        else
572        {
573            this_mv.col = (startmv.col - 8) | 6;
574            diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
575        }
576
577        break;
578    case 3:
579        this_mv.col += 2;
580        this_mv.row += 2;
581        diag = vfp->svf(y, d->pre_stride,  this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
582        break;
583    }
584
585    diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
586
587    if (diag < bestmse)
588    {
589        *bestmv = this_mv;
590        bestmse = diag;
591    }
592
593//  }
594
595    return bestmse;
596}
597
598int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
599{
600    int bestmse = INT_MAX;
601    MV startmv;
602    //MV this_mv;
603    MV this_mv;
604    unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
605    unsigned char *z = (*(b->base_src) + b->src);
606    int left, right, up, down, diag;
607    unsigned int sse;
608
609    // Trap uncodable vectors
610    if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL))
611    {
612        bestmv->row <<= 3;
613        bestmv->col <<= 3;
614        return INT_MAX;
615    }
616
617    // central mv
618    bestmv->row <<= 3;
619    bestmv->col <<= 3;
620    startmv = *bestmv;
621
622    // calculate central point error
623    bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
624    bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
625
626    // go left then right and check error
627    this_mv.row = startmv.row;
628    this_mv.col = ((startmv.col - 8) | 4);
629    left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
630    left += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
631
632    if (left < bestmse)
633    {
634        *bestmv = this_mv;
635        bestmse = left;
636    }
637
638    this_mv.col += 8;
639    right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
640    right += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
641
642    if (right < bestmse)
643    {
644        *bestmv = this_mv;
645        bestmse = right;
646    }
647
648    // go up then down and check error
649    this_mv.col = startmv.col;
650    this_mv.row = ((startmv.row - 8) | 4);
651    up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
652    up += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
653
654    if (up < bestmse)
655    {
656        *bestmv = this_mv;
657        bestmse = up;
658    }
659
660    this_mv.row += 8;
661    down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
662    down += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
663
664    if (down < bestmse)
665    {
666        *bestmv = this_mv;
667        bestmse = down;
668    }
669
670    // somewhat strangely not doing all the diagonals for half pel is slower than doing them.
671#if 0
672    // now check 1 more diagonal -
673    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
674    this_mv = startmv;
675
676    switch (whichdir)
677    {
678    case 0:
679        this_mv.col = (this_mv.col - 8) | 4;
680        this_mv.row = (this_mv.row - 8) | 4;
681        diag = vfp->svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
682        break;
683    case 1:
684        this_mv.col += 4;
685        this_mv.row = (this_mv.row - 8) | 4;
686        diag = vfp->svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
687        break;
688    case 2:
689        this_mv.col = (this_mv.col - 8) | 4;
690        this_mv.row += 4;
691        diag = vfp->svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);
692        break;
693    case 3:
694        this_mv.col += 4;
695        this_mv.row += 4;
696        diag = vfp->svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);
697        break;
698    }
699
700    diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
701
702    if (diag < bestmse)
703    {
704        *bestmv = this_mv;
705        bestmse = diag;
706    }
707
708#else
709    this_mv.col = (this_mv.col - 8) | 4;
710    this_mv.row = (this_mv.row - 8) | 4;
711    diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
712    diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
713
714    if (diag < bestmse)
715    {
716        *bestmv = this_mv;
717        bestmse = diag;
718    }
719
720    this_mv.col += 8;
721    diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
722    diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
723
724    if (diag < bestmse)
725    {
726        *bestmv = this_mv;
727        bestmse = diag;
728    }
729
730    this_mv.col = (this_mv.col - 8) | 4;
731    this_mv.row = startmv.row + 4;
732    diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
733    diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
734
735    if (diag < bestmse)
736    {
737        *bestmv = this_mv;
738        bestmse = diag;
739    }
740
741    this_mv.col += 8;
742    diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
743    diag += mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
744
745    if (diag < bestmse)
746    {
747        *bestmv = this_mv;
748        bestmse = diag;
749    }
750
751#endif
752    return bestmse;
753}
754
755
756#define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
757#define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector
758#define DIST(r,c,v) vfp->sdf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score.
759#define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost
760#define CHECK_BETTER(v,r,c) if ((v = ERR(r,c,besterr)) < besterr) { besterr = v; br=r; bc=c; } // checks if (r,c) has better score than previous best
761static const MV next_chkpts[6][3] =
762{
763    {{ -2, 0}, { -1, -2}, {1, -2}},
764    {{ -1, -2}, {1, -2}, {2, 0}},
765    {{1, -2}, {2, 0}, {1, 2}},
766    {{2, 0}, {1, 2}, { -1, 2}},
767    {{1, 2}, { -1, 2}, { -2, 0}},
768    {{ -1, 2}, { -2, 0}, { -1, -2}}
769};
770int vp8_hex_search
771(
772    MACROBLOCK *x,
773    BLOCK *b,
774    BLOCKD *d,
775    MV *ref_mv,
776    MV *best_mv,
777    int search_param,
778    int error_per_bit,
779    int *num00,
780    const vp8_variance_fn_ptr_t *vfp,
781    int *mvsadcost[2],
782    int *mvcost[2],
783    MV *center_mv
784)
785{
786    MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
787    MV neighbors[8] = { { -1, -1}, {0, -1}, {1, -1}, { -1, 0}, {1, 0}, { -1, 1}, {0, 1}, {1, 1} } ;
788    int i, j;
789    unsigned char *src = (*(b->base_src) + b->src);
790    int src_stride = b->src_stride;
791    int rr = center_mv->row, rc = center_mv->col;
792    int br = ref_mv->row >> 3, bc = ref_mv->col >> 3, tr, tc;
793    unsigned int besterr, thiserr = 0x7fffffff;
794    int k = -1, tk;
795
796    if (bc < x->mv_col_min) bc = x->mv_col_min;
797
798    if (bc > x->mv_col_max) bc = x->mv_col_max;
799
800    if (br < x->mv_row_min) br = x->mv_row_min;
801
802    if (br > x->mv_row_max) br = x->mv_row_max;
803
804    rr >>= 1;
805    rc >>= 1;
806
807    besterr = ERR(br, bc, thiserr);
808
809    // hex search
810    //j=0
811    tr = br;
812    tc = bc;
813
814    for (i = 0; i < 6; i++)
815    {
816        int nr = tr + hex[i].row, nc = tc + hex[i].col;
817
818        if (nc < x->mv_col_min) continue;
819
820        if (nc > x->mv_col_max) continue;
821
822        if (nr < x->mv_row_min) continue;
823
824        if (nr > x->mv_row_max) continue;
825
826        //CHECK_BETTER(thiserr,nr,nc);
827        if ((thiserr = ERR(nr, nc, besterr)) < besterr)
828        {
829            besterr = thiserr;
830            br = nr;
831            bc = nc;
832            k = i;
833        }
834    }
835
836    if (tr == br && tc == bc)
837        goto cal_neighbors;
838
839    for (j = 1; j < 127; j++)
840    {
841        tr = br;
842        tc = bc;
843        tk = k;
844
845        for (i = 0; i < 3; i++)
846        {
847            int nr = tr + next_chkpts[tk][i].row, nc = tc + next_chkpts[tk][i].col;
848
849            if (nc < x->mv_col_min) continue;
850
851            if (nc > x->mv_col_max) continue;
852
853            if (nr < x->mv_row_min) continue;
854
855            if (nr > x->mv_row_max) continue;
856
857            //CHECK_BETTER(thiserr,nr,nc);
858            if ((thiserr = ERR(nr, nc, besterr)) < besterr)
859            {
860                besterr = thiserr;
861                br = nr;
862                bc = nc; //k=(tk+5+i)%6;}
863                k = tk + 5 + i;
864
865                if (k >= 12) k -= 12;
866                else if (k >= 6) k -= 6;
867            }
868        }
869
870        if (tr == br && tc == bc)
871            break;
872    }
873
874    // check 8 1 away neighbors
875cal_neighbors:
876    tr = br;
877    tc = bc;
878
879    for (i = 0; i < 8; i++)
880    {
881        int nr = tr + neighbors[i].row, nc = tc + neighbors[i].col;
882
883        if (nc < x->mv_col_min) continue;
884
885        if (nc > x->mv_col_max) continue;
886
887        if (nr < x->mv_row_min) continue;
888
889        if (nr > x->mv_row_max) continue;
890
891        CHECK_BETTER(thiserr, nr, nc);
892    }
893
894    best_mv->row = br;
895    best_mv->col = bc;
896
897    return vfp->vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + mv_err_cost(best_mv, center_mv, mvcost, error_per_bit) ;
898}
899#undef MVC
900#undef PRE
901#undef SP
902#undef DIST
903#undef ERR
904#undef CHECK_BETTER
905
906
907int vp8_diamond_search_sad
908(
909    MACROBLOCK *x,
910    BLOCK *b,
911    BLOCKD *d,
912    MV *ref_mv,
913    MV *best_mv,
914    int search_param,
915    int error_per_bit,
916    int *num00,
917    vp8_variance_fn_ptr_t *fn_ptr,
918    int *mvsadcost[2],
919    int *mvcost[2],
920    MV *center_mv
921)
922{
923    int i, j, step;
924
925    unsigned char *what = (*(b->base_src) + b->src);
926    int what_stride = b->src_stride;
927    unsigned char *in_what;
928    int in_what_stride = d->pre_stride;
929    unsigned char *best_address;
930
931    int tot_steps;
932    MV this_mv;
933
934    int bestsad = INT_MAX;
935    int best_site = 0;
936    int last_site = 0;
937
938    int ref_row = ref_mv->row >> 3;
939    int ref_col = ref_mv->col >> 3;
940    int this_row_offset;
941    int this_col_offset;
942    search_site *ss;
943
944    unsigned char *check_here;
945    int thissad;
946
947    *num00 = 0;
948
949    // Work out the start point for the search
950    in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
951    best_address = in_what;
952
953    // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
954    if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
955    (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
956    {
957        // Check the starting position
958        bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
959    }
960
961    // search_param determines the length of the initial step and hence the number of iterations
962    // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
963    ss = &x->ss[search_param * x->searches_per_step];
964    tot_steps = (x->ss_count / x->searches_per_step) - search_param;
965
966    i = 1;
967    best_mv->row = ref_row;
968    best_mv->col = ref_col;
969
970    for (step = 0; step < tot_steps ; step++)
971    {
972        for (j = 0 ; j < x->searches_per_step ; j++)
973        {
974            // Trap illegal vectors
975            this_row_offset = best_mv->row + ss[i].mv.row;
976            this_col_offset = best_mv->col + ss[i].mv.col;
977
978            if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
979            (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
980
981            {
982                check_here = ss[i].offset + best_address;
983                thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
984
985                if (thissad < bestsad)
986                {
987                    this_mv.row = this_row_offset << 3;
988                    this_mv.col = this_col_offset << 3;
989                    thissad += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
990
991                    if (thissad < bestsad)
992                    {
993                        bestsad = thissad;
994                        best_site = i;
995                    }
996                }
997            }
998
999            i++;
1000        }
1001
1002        if (best_site != last_site)
1003        {
1004            best_mv->row += ss[best_site].mv.row;
1005            best_mv->col += ss[best_site].mv.col;
1006            best_address += ss[best_site].offset;
1007            last_site = best_site;
1008        }
1009        else if (best_address == in_what)
1010            (*num00)++;
1011    }
1012
1013    this_mv.row = best_mv->row << 3;
1014    this_mv.col = best_mv->col << 3;
1015
1016    if (bestsad == INT_MAX)
1017        return INT_MAX;
1018
1019    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
1020    + mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
1021}
1022
1023int vp8_diamond_search_sadx4
1024(
1025    MACROBLOCK *x,
1026    BLOCK *b,
1027    BLOCKD *d,
1028    MV *ref_mv,
1029    MV *best_mv,
1030    int search_param,
1031    int error_per_bit,
1032    int *num00,
1033    vp8_variance_fn_ptr_t *fn_ptr,
1034    int *mvsadcost[2],
1035    int *mvcost[2],
1036    MV *center_mv
1037)
1038{
1039    int i, j, step;
1040
1041    unsigned char *what = (*(b->base_src) + b->src);
1042    int what_stride = b->src_stride;
1043    unsigned char *in_what;
1044    int in_what_stride = d->pre_stride;
1045    unsigned char *best_address;
1046
1047    int tot_steps;
1048    MV this_mv;
1049
1050    int bestsad = INT_MAX;
1051    int best_site = 0;
1052    int last_site = 0;
1053
1054    int ref_row = ref_mv->row >> 3;
1055    int ref_col = ref_mv->col >> 3;
1056    int this_row_offset;
1057    int this_col_offset;
1058    search_site *ss;
1059
1060    unsigned char *check_here;
1061    unsigned int thissad;
1062
1063    *num00 = 0;
1064
1065    // Work out the start point for the search
1066    in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
1067    best_address = in_what;
1068
1069    // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
1070    if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
1071    (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
1072    {
1073        // Check the starting position
1074        bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
1075    }
1076
1077    // search_param determines the length of the initial step and hence the number of iterations
1078    // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1079    ss = &x->ss[search_param * x->searches_per_step];
1080    tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1081
1082    i = 1;
1083    best_mv->row = ref_row;
1084    best_mv->col = ref_col;
1085
1086    for (step = 0; step < tot_steps ; step++)
1087    {
1088        int all_in = 1, t;
1089
1090        // To know if all neighbor points are within the bounds, 4 bounds checking are enough instead of
1091        // checking 4 bounds for each points.
1092        all_in &= ((best_mv->row + ss[i].mv.row)> x->mv_row_min);
1093        all_in &= ((best_mv->row + ss[i+1].mv.row) < x->mv_row_max);
1094        all_in &= ((best_mv->col + ss[i+2].mv.col) > x->mv_col_min);
1095        all_in &= ((best_mv->col + ss[i+3].mv.col) < x->mv_col_max);
1096
1097        if (all_in)
1098        {
1099            unsigned int sad_array[4];
1100
1101            for (j = 0 ; j < x->searches_per_step ; j += 4)
1102            {
1103                unsigned char *block_offset[4];
1104
1105                for (t = 0; t < 4; t++)
1106                    block_offset[t] = ss[i+t].offset + best_address;
1107
1108                fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
1109
1110                for (t = 0; t < 4; t++, i++)
1111                {
1112                    if (sad_array[t] < bestsad)
1113                    {
1114                        this_mv.row = (best_mv->row + ss[i].mv.row) << 3;
1115                        this_mv.col = (best_mv->col + ss[i].mv.col) << 3;
1116                        sad_array[t] += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
1117
1118                        if (sad_array[t] < bestsad)
1119                        {
1120                            bestsad = sad_array[t];
1121                            best_site = i;
1122                        }
1123                    }
1124                }
1125            }
1126        }
1127        else
1128        {
1129            for (j = 0 ; j < x->searches_per_step ; j++)
1130            {
1131                // Trap illegal vectors
1132                this_row_offset = best_mv->row + ss[i].mv.row;
1133                this_col_offset = best_mv->col + ss[i].mv.col;
1134
1135                if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1136                (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
1137                {
1138                    check_here = ss[i].offset + best_address;
1139                    thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1140
1141                    if (thissad < bestsad)
1142                    {
1143                        this_mv.row = this_row_offset << 3;
1144                        this_mv.col = this_col_offset << 3;
1145                        thissad += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
1146
1147                        if (thissad < bestsad)
1148                        {
1149                            bestsad = thissad;
1150                            best_site = i;
1151                        }
1152                    }
1153                }
1154                i++;
1155            }
1156        }
1157
1158        if (best_site != last_site)
1159        {
1160            best_mv->row += ss[best_site].mv.row;
1161            best_mv->col += ss[best_site].mv.col;
1162            best_address += ss[best_site].offset;
1163            last_site = best_site;
1164        }
1165        else if (best_address == in_what)
1166            (*num00)++;
1167    }
1168
1169    this_mv.row = best_mv->row << 3;
1170    this_mv.col = best_mv->col << 3;
1171
1172    if (bestsad == INT_MAX)
1173        return INT_MAX;
1174
1175    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
1176    + mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
1177}
1178
1179
1180#if !(CONFIG_REALTIME_ONLY)
1181int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv)
1182{
1183    unsigned char *what = (*(b->base_src) + b->src);
1184    int what_stride = b->src_stride;
1185    unsigned char *in_what;
1186    int in_what_stride = d->pre_stride;
1187    int mv_stride = d->pre_stride;
1188    unsigned char *bestaddress;
1189    MV *best_mv = &d->bmi.mv.as_mv;
1190    MV this_mv;
1191    int bestsad = INT_MAX;
1192    int r, c;
1193
1194    unsigned char *check_here;
1195    int thissad;
1196
1197    int ref_row = ref_mv->row >> 3;
1198    int ref_col = ref_mv->col >> 3;
1199
1200    int row_min = ref_row - distance;
1201    int row_max = ref_row + distance;
1202    int col_min = ref_col - distance;
1203    int col_max = ref_col + distance;
1204
1205    // Work out the mid point for the search
1206    in_what = *(d->base_pre) + d->pre;
1207    bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
1208
1209    best_mv->row = ref_row;
1210    best_mv->col = ref_col;
1211
1212    // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
1213    if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
1214    (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
1215    {
1216        // Baseline value at the centre
1217
1218        //bestsad = fn_ptr->sf( what,what_stride,bestaddress,in_what_stride) + (int)sqrt(mv_err_cost(ref_mv,ref_mv, mvcost,error_per_bit*14));
1219        bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
1220    }
1221
1222    // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
1223    if (col_min < x->mv_col_min)
1224        col_min = x->mv_col_min;
1225
1226    if (col_max > x->mv_col_max)
1227        col_max = x->mv_col_max;
1228
1229    if (row_min < x->mv_row_min)
1230        row_min = x->mv_row_min;
1231
1232    if (row_max > x->mv_row_max)
1233        row_max = x->mv_row_max;
1234
1235    for (r = row_min; r < row_max ; r++)
1236    {
1237        this_mv.row = r << 3;
1238        check_here = r * mv_stride + in_what + col_min;
1239
1240        for (c = col_min; c < col_max; c++)
1241        {
1242            thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1243
1244            this_mv.col = c << 3;
1245            //thissad += (int)sqrt(mv_err_cost(&this_mv,ref_mv, mvcost,error_per_bit*14));
1246            //thissad  += error_per_bit * mv_bits_sadcost[mv_bits(&this_mv, ref_mv, mvcost)];
1247            thissad  += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost);
1248
1249            if (thissad < bestsad)
1250            {
1251                bestsad = thissad;
1252                best_mv->row = r;
1253                best_mv->col = c;
1254                bestaddress = check_here;
1255            }
1256
1257            check_here++;
1258        }
1259    }
1260
1261    this_mv.row = best_mv->row << 3;
1262    this_mv.col = best_mv->col << 3;
1263
1264    if (bestsad < INT_MAX)
1265        return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
1266        + mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
1267    else
1268        return INT_MAX;
1269}
1270
1271int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv)
1272{
1273    unsigned char *what = (*(b->base_src) + b->src);
1274    int what_stride = b->src_stride;
1275    unsigned char *in_what;
1276    int in_what_stride = d->pre_stride;
1277    int mv_stride = d->pre_stride;
1278    unsigned char *bestaddress;
1279    MV *best_mv = &d->bmi.mv.as_mv;
1280    MV this_mv;
1281    int bestsad = INT_MAX;
1282    int r, c;
1283
1284    unsigned char *check_here;
1285    unsigned int thissad;
1286
1287    int ref_row = ref_mv->row >> 3;
1288    int ref_col = ref_mv->col >> 3;
1289
1290    int row_min = ref_row - distance;
1291    int row_max = ref_row + distance;
1292    int col_min = ref_col - distance;
1293    int col_max = ref_col + distance;
1294
1295    unsigned int sad_array[3];
1296
1297    // Work out the mid point for the search
1298    in_what = *(d->base_pre) + d->pre;
1299    bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
1300
1301    best_mv->row = ref_row;
1302    best_mv->col = ref_col;
1303
1304    // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
1305    if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
1306    (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
1307    {
1308        // Baseline value at the centre
1309        bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
1310    }
1311
1312    // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
1313    if (col_min < x->mv_col_min)
1314        col_min = x->mv_col_min;
1315
1316    if (col_max > x->mv_col_max)
1317        col_max = x->mv_col_max;
1318
1319    if (row_min < x->mv_row_min)
1320        row_min = x->mv_row_min;
1321
1322    if (row_max > x->mv_row_max)
1323        row_max = x->mv_row_max;
1324
1325    for (r = row_min; r < row_max ; r++)
1326    {
1327        this_mv.row = r << 3;
1328        check_here = r * mv_stride + in_what + col_min;
1329        c = col_min;
1330
1331        while ((c + 2) < col_max)
1332        {
1333            int i;
1334
1335            fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
1336
1337            for (i = 0; i < 3; i++)
1338            {
1339                thissad = sad_array[i];
1340
1341                if (thissad < bestsad)
1342                {
1343                    this_mv.col = c << 3;
1344                    thissad  += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
1345
1346                    if (thissad < bestsad)
1347                    {
1348                        bestsad = thissad;
1349                        best_mv->row = r;
1350                        best_mv->col = c;
1351                        bestaddress = check_here;
1352                    }
1353                }
1354
1355                check_here++;
1356                c++;
1357            }
1358        }
1359
1360        while (c < col_max)
1361        {
1362            thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1363
1364            if (thissad < bestsad)
1365            {
1366                this_mv.col = c << 3;
1367                thissad  += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
1368
1369                if (thissad < bestsad)
1370                {
1371                    bestsad = thissad;
1372                    best_mv->row = r;
1373                    best_mv->col = c;
1374                    bestaddress = check_here;
1375                }
1376            }
1377
1378            check_here ++;
1379            c ++;
1380        }
1381
1382    }
1383
1384    this_mv.row = best_mv->row << 3;
1385    this_mv.col = best_mv->col << 3;
1386
1387    if (bestsad < INT_MAX)
1388        return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
1389        + mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
1390    else
1391        return INT_MAX;
1392}
1393
1394int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv)
1395{
1396    unsigned char *what = (*(b->base_src) + b->src);
1397    int what_stride = b->src_stride;
1398    unsigned char *in_what;
1399    int in_what_stride = d->pre_stride;
1400    int mv_stride = d->pre_stride;
1401    unsigned char *bestaddress;
1402    MV *best_mv = &d->bmi.mv.as_mv;
1403    MV this_mv;
1404    int bestsad = INT_MAX;
1405    int r, c;
1406
1407    unsigned char *check_here;
1408    unsigned int thissad;
1409
1410    int ref_row = ref_mv->row >> 3;
1411    int ref_col = ref_mv->col >> 3;
1412
1413    int row_min = ref_row - distance;
1414    int row_max = ref_row + distance;
1415    int col_min = ref_col - distance;
1416    int col_max = ref_col + distance;
1417
1418    DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8);
1419    unsigned int sad_array[3];
1420
1421    // Work out the mid point for the search
1422    in_what = *(d->base_pre) + d->pre;
1423    bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
1424
1425    best_mv->row = ref_row;
1426    best_mv->col = ref_col;
1427
1428    // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
1429    if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
1430    (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
1431    {
1432        // Baseline value at the centre
1433        bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit);
1434    }
1435
1436    // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
1437    if (col_min < x->mv_col_min)
1438        col_min = x->mv_col_min;
1439
1440    if (col_max > x->mv_col_max)
1441        col_max = x->mv_col_max;
1442
1443    if (row_min < x->mv_row_min)
1444        row_min = x->mv_row_min;
1445
1446    if (row_max > x->mv_row_max)
1447        row_max = x->mv_row_max;
1448
1449    for (r = row_min; r < row_max ; r++)
1450    {
1451        this_mv.row = r << 3;
1452        check_here = r * mv_stride + in_what + col_min;
1453        c = col_min;
1454
1455        while ((c + 7) < col_max)
1456        {
1457            int i;
1458
1459            fn_ptr->sdx8f(what, what_stride, check_here , in_what_stride, sad_array8);
1460
1461            for (i = 0; i < 8; i++)
1462            {
1463                thissad = (unsigned int)sad_array8[i];
1464
1465                if (thissad < bestsad)
1466                {
1467                    this_mv.col = c << 3;
1468                    thissad  += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
1469
1470                    if (thissad < bestsad)
1471                    {
1472                        bestsad = thissad;
1473                        best_mv->row = r;
1474                        best_mv->col = c;
1475                        bestaddress = check_here;
1476                    }
1477                }
1478
1479                check_here++;
1480                c++;
1481            }
1482        }
1483
1484        while ((c + 2) < col_max)
1485        {
1486            int i;
1487
1488            fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
1489
1490            for (i = 0; i < 3; i++)
1491            {
1492                thissad = sad_array[i];
1493
1494                if (thissad < bestsad)
1495                {
1496                    this_mv.col = c << 3;
1497                    thissad  += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
1498
1499                    if (thissad < bestsad)
1500                    {
1501                        bestsad = thissad;
1502                        best_mv->row = r;
1503                        best_mv->col = c;
1504                        bestaddress = check_here;
1505                    }
1506                }
1507
1508                check_here++;
1509                c++;
1510            }
1511        }
1512
1513        while (c < col_max)
1514        {
1515            thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
1516
1517            if (thissad < bestsad)
1518            {
1519                this_mv.col = c << 3;
1520                thissad  += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit);
1521
1522                if (thissad < bestsad)
1523                {
1524                    bestsad = thissad;
1525                    best_mv->row = r;
1526                    best_mv->col = c;
1527                    bestaddress = check_here;
1528                }
1529            }
1530
1531            check_here ++;
1532            c ++;
1533        }
1534    }
1535
1536    this_mv.row = best_mv->row << 3;
1537    this_mv.col = best_mv->col << 3;
1538
1539    if (bestsad < INT_MAX)
1540        return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
1541        + mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
1542    else
1543        return INT_MAX;
1544}
1545#endif /* !(CONFIG_REALTIME_ONLY) */
1546
1547#ifdef ENTROPY_STATS
1548void print_mode_context(void)
1549{
1550    FILE *f = fopen("modecont.c", "w");
1551    int i, j;
1552
1553    fprintf(f, "#include \"entropy.h\"\n");
1554    fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
1555    fprintf(f, "{\n");
1556
1557    for (j = 0; j < 6; j++)
1558    {
1559        fprintf(f, "  { // %d \n", j);
1560        fprintf(f, "    ");
1561
1562        for (i = 0; i < 4; i++)
1563        {
1564            int overal_prob;
1565            int this_prob;
1566            int count; // = mv_ref_ct[j][i][0]+mv_ref_ct[j][i][1];
1567
1568            // Overall probs
1569            count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
1570
1571            if (count)
1572                overal_prob = 256 * mv_mode_cts[i][0] / count;
1573            else
1574                overal_prob = 128;
1575
1576            if (overal_prob == 0)
1577                overal_prob = 1;
1578
1579            // context probs
1580            count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
1581
1582            if (count)
1583                this_prob = 256 * mv_ref_ct[j][i][0] / count;
1584            else
1585                this_prob = 128;
1586
1587            if (this_prob == 0)
1588                this_prob = 1;
1589
1590            fprintf(f, "%5d, ", this_prob);
1591            //fprintf(f,"%5d, %5d, %8d,", this_prob, overal_prob, (this_prob << 10)/overal_prob);
1592            //fprintf(f,"%8d, ", (this_prob << 10)/overal_prob);
1593        }
1594
1595        fprintf(f, "  },\n");
1596    }
1597
1598    fprintf(f, "};\n");
1599    fclose(f);
1600}
1601
1602/* MV ref count ENTROPY_STATS stats code */
1603#ifdef ENTROPY_STATS
1604void init_mv_ref_counts()
1605{
1606    vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
1607    vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
1608}
1609
1610void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
1611{
1612    if (m == ZEROMV)
1613    {
1614        ++mv_ref_ct [ct[0]] [0] [0];
1615        ++mv_mode_cts[0][0];
1616    }
1617    else
1618    {
1619        ++mv_ref_ct [ct[0]] [0] [1];
1620        ++mv_mode_cts[0][1];
1621
1622        if (m == NEARESTMV)
1623        {
1624            ++mv_ref_ct [ct[1]] [1] [0];
1625            ++mv_mode_cts[1][0];
1626        }
1627        else
1628        {
1629            ++mv_ref_ct [ct[1]] [1] [1];
1630            ++mv_mode_cts[1][1];
1631
1632            if (m == NEARMV)
1633            {
1634                ++mv_ref_ct [ct[2]] [2] [0];
1635                ++mv_mode_cts[2][0];
1636            }
1637            else
1638            {
1639                ++mv_ref_ct [ct[2]] [2] [1];
1640                ++mv_mode_cts[2][1];
1641
1642                if (m == NEWMV)
1643                {
1644                    ++mv_ref_ct [ct[3]] [3] [0];
1645                    ++mv_mode_cts[3][0];
1646                }
1647                else
1648                {
1649                    ++mv_ref_ct [ct[3]] [3] [1];
1650                    ++mv_mode_cts[3][1];
1651                }
1652            }
1653        }
1654    }
1655}
1656
1657#endif/* END MV ref count ENTROPY_STATS stats code */
1658
1659#endif
1660