1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "./vp8_rtcd.h"
12#include "./vpx_dsp_rtcd.h"
13#include "onyx_int.h"
14#include "mcomp.h"
15#include "vpx_mem/vpx_mem.h"
16#include "vpx_config.h"
17#include <stdio.h>
18#include <limits.h>
19#include <math.h>
20#include "vp8/common/findnearmv.h"
21#include "vp8/common/common.h"
22#include "vpx_dsp/vpx_dsp_common.h"
23
24#ifdef VP8_ENTROPY_STATS
25static int mv_ref_ct[31][4][2];
26static int mv_mode_cts[4][2];
27#endif
28
29int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) {
30  /* MV costing is based on the distribution of vectors in the previous
31   * frame and as such will tend to over state the cost of vectors. In
32   * addition coding a new vector can have a knock on effect on the cost
33   * of subsequent vectors and the quality of prediction from NEAR and
34   * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
35   * limited extent, for some account to be taken of these factors.
36   */
37  return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
38           mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) *
39          Weight) >>
40         7;
41}
42
43static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2],
44                       int error_per_bit) {
45  /* Ignore mv costing if mvcost is NULL */
46  if (mvcost) {
47    return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
48             mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) *
49                error_per_bit +
50            128) >>
51           8;
52  }
53  return 0;
54}
55
56static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2],
57                          int error_per_bit) {
58  /* Calculate sad error cost on full pixel basis. */
59  /* Ignore mv costing if mvsadcost is NULL */
60  if (mvsadcost) {
61    return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
62             mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) *
63                error_per_bit +
64            128) >>
65           8;
66  }
67  return 0;
68}
69
70void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
71  int Len;
72  int search_site_count = 0;
73
74  /* Generate offsets for 4 search sites per step. */
75  Len = MAX_FIRST_STEP;
76  x->ss[search_site_count].mv.col = 0;
77  x->ss[search_site_count].mv.row = 0;
78  x->ss[search_site_count].offset = 0;
79  search_site_count++;
80
81  while (Len > 0) {
82    /* Compute offsets for search sites. */
83    x->ss[search_site_count].mv.col = 0;
84    x->ss[search_site_count].mv.row = -Len;
85    x->ss[search_site_count].offset = -Len * stride;
86    search_site_count++;
87
88    /* Compute offsets for search sites. */
89    x->ss[search_site_count].mv.col = 0;
90    x->ss[search_site_count].mv.row = Len;
91    x->ss[search_site_count].offset = Len * stride;
92    search_site_count++;
93
94    /* Compute offsets for search sites. */
95    x->ss[search_site_count].mv.col = -Len;
96    x->ss[search_site_count].mv.row = 0;
97    x->ss[search_site_count].offset = -Len;
98    search_site_count++;
99
100    /* Compute offsets for search sites. */
101    x->ss[search_site_count].mv.col = Len;
102    x->ss[search_site_count].mv.row = 0;
103    x->ss[search_site_count].offset = Len;
104    search_site_count++;
105
106    /* Contract. */
107    Len /= 2;
108  }
109
110  x->ss_count = search_site_count;
111  x->searches_per_step = 4;
112}
113
114void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) {
115  int Len;
116  int search_site_count = 0;
117
118  /* Generate offsets for 8 search sites per step. */
119  Len = MAX_FIRST_STEP;
120  x->ss[search_site_count].mv.col = 0;
121  x->ss[search_site_count].mv.row = 0;
122  x->ss[search_site_count].offset = 0;
123  search_site_count++;
124
125  while (Len > 0) {
126    /* Compute offsets for search sites. */
127    x->ss[search_site_count].mv.col = 0;
128    x->ss[search_site_count].mv.row = -Len;
129    x->ss[search_site_count].offset = -Len * stride;
130    search_site_count++;
131
132    /* Compute offsets for search sites. */
133    x->ss[search_site_count].mv.col = 0;
134    x->ss[search_site_count].mv.row = Len;
135    x->ss[search_site_count].offset = Len * stride;
136    search_site_count++;
137
138    /* Compute offsets for search sites. */
139    x->ss[search_site_count].mv.col = -Len;
140    x->ss[search_site_count].mv.row = 0;
141    x->ss[search_site_count].offset = -Len;
142    search_site_count++;
143
144    /* Compute offsets for search sites. */
145    x->ss[search_site_count].mv.col = Len;
146    x->ss[search_site_count].mv.row = 0;
147    x->ss[search_site_count].offset = Len;
148    search_site_count++;
149
150    /* Compute offsets for search sites. */
151    x->ss[search_site_count].mv.col = -Len;
152    x->ss[search_site_count].mv.row = -Len;
153    x->ss[search_site_count].offset = -Len * stride - Len;
154    search_site_count++;
155
156    /* Compute offsets for search sites. */
157    x->ss[search_site_count].mv.col = Len;
158    x->ss[search_site_count].mv.row = -Len;
159    x->ss[search_site_count].offset = -Len * stride + Len;
160    search_site_count++;
161
162    /* Compute offsets for search sites. */
163    x->ss[search_site_count].mv.col = -Len;
164    x->ss[search_site_count].mv.row = Len;
165    x->ss[search_site_count].offset = Len * stride - Len;
166    search_site_count++;
167
168    /* Compute offsets for search sites. */
169    x->ss[search_site_count].mv.col = Len;
170    x->ss[search_site_count].mv.row = Len;
171    x->ss[search_site_count].offset = Len * stride + Len;
172    search_site_count++;
173
174    /* Contract. */
175    Len /= 2;
176  }
177
178  x->ss_count = search_site_count;
179  x->searches_per_step = 8;
180}
181
182/*
183 * To avoid the penalty for crossing cache-line read, preload the reference
184 * area in a small buffer, which is aligned to make sure there won't be crossing
185 * cache-line read while reading from this buffer. This reduced the cpu
186 * cycles spent on reading ref data in sub-pixel filter functions.
187 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
188 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
189 * could reduce the area.
190 */
191
192/* estimated cost of a motion vector (r,c) */
193#define MVC(r, c)                                                             \
194  (mvcost                                                                     \
195       ? ((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128) >> 8 \
196       : 0)
197/* pointer to predictor base of a motionvector */
198#define PRE(r, c) (y + (((r) >> 2) * y_stride + ((c) >> 2) - (offset)))
199/* convert motion vector component to offset for svf calc */
200#define SP(x) (((x)&3) << 1)
201/* returns subpixel variance error function. */
202#define DIST(r, c) \
203  vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse)
204#define IFMVCV(r, c, s, e) \
205  if (c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
206/* returns distortion + motion vector cost */
207#define ERR(r, c) (MVC(r, c) + DIST(r, c))
208/* checks if (r,c) has better score than previous best */
209#define CHECK_BETTER(v, r, c)                           \
210  IFMVCV(r, c,                                          \
211         {                                              \
212           thismse = DIST(r, c);                        \
213           if ((v = (MVC(r, c) + thismse)) < besterr) { \
214             besterr = v;                               \
215             br = r;                                    \
216             bc = c;                                    \
217             *distortion = thismse;                     \
218             *sse1 = sse;                               \
219           }                                            \
220         },                                             \
221         v = UINT_MAX;)
222
223int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
224                                             int_mv *bestmv, int_mv *ref_mv,
225                                             int error_per_bit,
226                                             const vp8_variance_fn_ptr_t *vfp,
227                                             int *mvcost[2], int *distortion,
228                                             unsigned int *sse1) {
229  unsigned char *z = (*(b->base_src) + b->src);
230
231  int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
232  int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
233  int tr = br, tc = bc;
234  unsigned int besterr;
235  unsigned int left, right, up, down, diag;
236  unsigned int sse;
237  unsigned int whichdir;
238  unsigned int halfiters = 4;
239  unsigned int quarteriters = 4;
240  int thismse;
241
242  int minc = VPXMAX(x->mv_col_min * 4,
243                    (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
244  int maxc = VPXMIN(x->mv_col_max * 4,
245                    (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
246  int minr = VPXMAX(x->mv_row_min * 4,
247                    (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
248  int maxr = VPXMIN(x->mv_row_max * 4,
249                    (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
250
251  int y_stride;
252  int offset;
253  int pre_stride = x->e_mbd.pre.y_stride;
254  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
255
256#if ARCH_X86 || ARCH_X86_64
257  MACROBLOCKD *xd = &x->e_mbd;
258  unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
259                       bestmv->as_mv.col;
260  unsigned char *y;
261  int buf_r1, buf_r2, buf_c1;
262
263  /* Clamping to avoid out-of-range data access */
264  buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)
265               ? (bestmv->as_mv.row - x->mv_row_min)
266               : 3;
267  buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)
268               ? (x->mv_row_max - bestmv->as_mv.row)
269               : 3;
270  buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)
271               ? (bestmv->as_mv.col - x->mv_col_min)
272               : 3;
273  y_stride = 32;
274
275  /* Copy to intermediate buffer before searching. */
276  vfp->copymem(y_0 - buf_c1 - pre_stride * buf_r1, pre_stride, xd->y_buf,
277               y_stride, 16 + buf_r1 + buf_r2);
278  y = xd->y_buf + y_stride * buf_r1 + buf_c1;
279#else
280  unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
281                     bestmv->as_mv.col;
282  y_stride = pre_stride;
283#endif
284
285  offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
286
287  /* central mv */
288  bestmv->as_mv.row *= 8;
289  bestmv->as_mv.col *= 8;
290
291  /* calculate central point error */
292  besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
293  *distortion = besterr;
294  besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
295
296  /* TODO: Each subsequent iteration checks at least one point in common
297   * with the last iteration could be 2 ( if diag selected)
298   */
299  while (--halfiters) {
300    /* 1/2 pel */
301    CHECK_BETTER(left, tr, tc - 2);
302    CHECK_BETTER(right, tr, tc + 2);
303    CHECK_BETTER(up, tr - 2, tc);
304    CHECK_BETTER(down, tr + 2, tc);
305
306    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
307
308    switch (whichdir) {
309      case 0: CHECK_BETTER(diag, tr - 2, tc - 2); break;
310      case 1: CHECK_BETTER(diag, tr - 2, tc + 2); break;
311      case 2: CHECK_BETTER(diag, tr + 2, tc - 2); break;
312      case 3: CHECK_BETTER(diag, tr + 2, tc + 2); break;
313    }
314
315    /* no reason to check the same one again. */
316    if (tr == br && tc == bc) break;
317
318    tr = br;
319    tc = bc;
320  }
321
322  /* TODO: Each subsequent iteration checks at least one point in common
323   * with the last iteration could be 2 ( if diag selected)
324   */
325
326  /* 1/4 pel */
327  while (--quarteriters) {
328    CHECK_BETTER(left, tr, tc - 1);
329    CHECK_BETTER(right, tr, tc + 1);
330    CHECK_BETTER(up, tr - 1, tc);
331    CHECK_BETTER(down, tr + 1, tc);
332
333    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
334
335    switch (whichdir) {
336      case 0: CHECK_BETTER(diag, tr - 1, tc - 1); break;
337      case 1: CHECK_BETTER(diag, tr - 1, tc + 1); break;
338      case 2: CHECK_BETTER(diag, tr + 1, tc - 1); break;
339      case 3: CHECK_BETTER(diag, tr + 1, tc + 1); break;
340    }
341
342    /* no reason to check the same one again. */
343    if (tr == br && tc == bc) break;
344
345    tr = br;
346    tc = bc;
347  }
348
349  bestmv->as_mv.row = br * 2;
350  bestmv->as_mv.col = bc * 2;
351
352  if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
353      (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) {
354    return INT_MAX;
355  }
356
357  return besterr;
358}
359#undef MVC
360#undef PRE
361#undef SP
362#undef DIST
363#undef IFMVCV
364#undef ERR
365#undef CHECK_BETTER
366
367int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
368                                 int_mv *bestmv, int_mv *ref_mv,
369                                 int error_per_bit,
370                                 const vp8_variance_fn_ptr_t *vfp,
371                                 int *mvcost[2], int *distortion,
372                                 unsigned int *sse1) {
373  int bestmse = INT_MAX;
374  int_mv startmv;
375  int_mv this_mv;
376  unsigned char *z = (*(b->base_src) + b->src);
377  int left, right, up, down, diag;
378  unsigned int sse;
379  int whichdir;
380  int thismse;
381  int y_stride;
382  int pre_stride = x->e_mbd.pre.y_stride;
383  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
384
385#if ARCH_X86 || ARCH_X86_64
386  MACROBLOCKD *xd = &x->e_mbd;
387  unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
388                       bestmv->as_mv.col;
389  unsigned char *y;
390
391  y_stride = 32;
392  /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
393  vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
394  y = xd->y_buf + y_stride + 1;
395#else
396  unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
397                     bestmv->as_mv.col;
398  y_stride = pre_stride;
399#endif
400
401  /* central mv */
402  bestmv->as_mv.row *= 8;
403  bestmv->as_mv.col *= 8;
404  startmv = *bestmv;
405
406  /* calculate central point error */
407  bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
408  *distortion = bestmse;
409  bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
410
411  /* go left then right and check error */
412  this_mv.as_mv.row = startmv.as_mv.row;
413  this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
414  /* "halfpix" horizontal variance */
415  thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
416  left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
417
418  if (left < bestmse) {
419    *bestmv = this_mv;
420    bestmse = left;
421    *distortion = thismse;
422    *sse1 = sse;
423  }
424
425  this_mv.as_mv.col += 8;
426  /* "halfpix" horizontal variance */
427  thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
428  right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
429
430  if (right < bestmse) {
431    *bestmv = this_mv;
432    bestmse = right;
433    *distortion = thismse;
434    *sse1 = sse;
435  }
436
437  /* go up then down and check error */
438  this_mv.as_mv.col = startmv.as_mv.col;
439  this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
440  /* "halfpix" vertical variance */
441  thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
442  up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
443
444  if (up < bestmse) {
445    *bestmv = this_mv;
446    bestmse = up;
447    *distortion = thismse;
448    *sse1 = sse;
449  }
450
451  this_mv.as_mv.row += 8;
452  /* "halfpix" vertical variance */
453  thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
454  down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
455
456  if (down < bestmse) {
457    *bestmv = this_mv;
458    bestmse = down;
459    *distortion = thismse;
460    *sse1 = sse;
461  }
462
463  /* now check 1 more diagonal */
464  whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
465  this_mv = startmv;
466
467  switch (whichdir) {
468    case 0:
469      this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
470      this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
471      /* "halfpix" horizontal/vertical variance */
472      thismse =
473          vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
474      break;
475    case 1:
476      this_mv.as_mv.col += 4;
477      this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
478      /* "halfpix" horizontal/vertical variance */
479      thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
480      break;
481    case 2:
482      this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
483      this_mv.as_mv.row += 4;
484      /* "halfpix" horizontal/vertical variance */
485      thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
486      break;
487    case 3:
488    default:
489      this_mv.as_mv.col += 4;
490      this_mv.as_mv.row += 4;
491      /* "halfpix" horizontal/vertical variance */
492      thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
493      break;
494  }
495
496  diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
497
498  if (diag < bestmse) {
499    *bestmv = this_mv;
500    bestmse = diag;
501    *distortion = thismse;
502    *sse1 = sse;
503  }
504
505  /* time to check quarter pels. */
506  if (bestmv->as_mv.row < startmv.as_mv.row) y -= y_stride;
507
508  if (bestmv->as_mv.col < startmv.as_mv.col) y--;
509
510  startmv = *bestmv;
511
512  /* go left then right and check error */
513  this_mv.as_mv.row = startmv.as_mv.row;
514
515  if (startmv.as_mv.col & 7) {
516    this_mv.as_mv.col = startmv.as_mv.col - 2;
517    thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
518                       this_mv.as_mv.row & 7, z, b->src_stride, &sse);
519  } else {
520    this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
521    thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
522                       b->src_stride, &sse);
523  }
524
525  left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
526
527  if (left < bestmse) {
528    *bestmv = this_mv;
529    bestmse = left;
530    *distortion = thismse;
531    *sse1 = sse;
532  }
533
534  this_mv.as_mv.col += 4;
535  thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
536                     z, b->src_stride, &sse);
537  right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
538
539  if (right < bestmse) {
540    *bestmv = this_mv;
541    bestmse = right;
542    *distortion = thismse;
543    *sse1 = sse;
544  }
545
546  /* go up then down and check error */
547  this_mv.as_mv.col = startmv.as_mv.col;
548
549  if (startmv.as_mv.row & 7) {
550    this_mv.as_mv.row = startmv.as_mv.row - 2;
551    thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
552                       this_mv.as_mv.row & 7, z, b->src_stride, &sse);
553  } else {
554    this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
555    thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
556                       b->src_stride, &sse);
557  }
558
559  up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
560
561  if (up < bestmse) {
562    *bestmv = this_mv;
563    bestmse = up;
564    *distortion = thismse;
565    *sse1 = sse;
566  }
567
568  this_mv.as_mv.row += 4;
569  thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
570                     z, b->src_stride, &sse);
571  down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
572
573  if (down < bestmse) {
574    *bestmv = this_mv;
575    bestmse = down;
576    *distortion = thismse;
577    *sse1 = sse;
578  }
579
580  /* now check 1 more diagonal */
581  whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
582
583  this_mv = startmv;
584
585  switch (whichdir) {
586    case 0:
587
588      if (startmv.as_mv.row & 7) {
589        this_mv.as_mv.row -= 2;
590
591        if (startmv.as_mv.col & 7) {
592          this_mv.as_mv.col -= 2;
593          thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
594                             this_mv.as_mv.row & 7, z, b->src_stride, &sse);
595        } else {
596          this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
597          thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
598                             b->src_stride, &sse);
599        }
600      } else {
601        this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
602
603        if (startmv.as_mv.col & 7) {
604          this_mv.as_mv.col -= 2;
605          thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6,
606                             z, b->src_stride, &sse);
607        } else {
608          this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
609          thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride,
610                             &sse);
611        }
612      }
613
614      break;
615    case 1:
616      this_mv.as_mv.col += 2;
617
618      if (startmv.as_mv.row & 7) {
619        this_mv.as_mv.row -= 2;
620        thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
621                           this_mv.as_mv.row & 7, z, b->src_stride, &sse);
622      } else {
623        this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
624        thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
625                           b->src_stride, &sse);
626      }
627
628      break;
629    case 2:
630      this_mv.as_mv.row += 2;
631
632      if (startmv.as_mv.col & 7) {
633        this_mv.as_mv.col -= 2;
634        thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
635                           this_mv.as_mv.row & 7, z, b->src_stride, &sse);
636      } else {
637        this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
638        thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
639                           b->src_stride, &sse);
640      }
641
642      break;
643    case 3:
644      this_mv.as_mv.col += 2;
645      this_mv.as_mv.row += 2;
646      thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
647                         this_mv.as_mv.row & 7, z, b->src_stride, &sse);
648      break;
649  }
650
651  diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
652
653  if (diag < bestmse) {
654    *bestmv = this_mv;
655    bestmse = diag;
656    *distortion = thismse;
657    *sse1 = sse;
658  }
659
660  return bestmse;
661}
662
663int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
664                                  int_mv *bestmv, int_mv *ref_mv,
665                                  int error_per_bit,
666                                  const vp8_variance_fn_ptr_t *vfp,
667                                  int *mvcost[2], int *distortion,
668                                  unsigned int *sse1) {
669  int bestmse = INT_MAX;
670  int_mv startmv;
671  int_mv this_mv;
672  unsigned char *z = (*(b->base_src) + b->src);
673  int left, right, up, down, diag;
674  unsigned int sse;
675  int whichdir;
676  int thismse;
677  int y_stride;
678  int pre_stride = x->e_mbd.pre.y_stride;
679  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
680
681#if ARCH_X86 || ARCH_X86_64
682  MACROBLOCKD *xd = &x->e_mbd;
683  unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
684                       bestmv->as_mv.col;
685  unsigned char *y;
686
687  y_stride = 32;
688  /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
689  vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
690  y = xd->y_buf + y_stride + 1;
691#else
692  unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
693                     bestmv->as_mv.col;
694  y_stride = pre_stride;
695#endif
696
697  /* central mv */
698  bestmv->as_mv.row *= 8;
699  bestmv->as_mv.col *= 8;
700  startmv = *bestmv;
701
702  /* calculate central point error */
703  bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
704  *distortion = bestmse;
705  bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
706
707  /* go left then right and check error */
708  this_mv.as_mv.row = startmv.as_mv.row;
709  this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
710  /* "halfpix" horizontal variance */
711  thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
712  left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
713
714  if (left < bestmse) {
715    *bestmv = this_mv;
716    bestmse = left;
717    *distortion = thismse;
718    *sse1 = sse;
719  }
720
721  this_mv.as_mv.col += 8;
722  /* "halfpix" horizontal variance */
723  thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
724  right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
725
726  if (right < bestmse) {
727    *bestmv = this_mv;
728    bestmse = right;
729    *distortion = thismse;
730    *sse1 = sse;
731  }
732
733  /* go up then down and check error */
734  this_mv.as_mv.col = startmv.as_mv.col;
735  this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
736  /* "halfpix" vertical variance */
737  thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
738  up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
739
740  if (up < bestmse) {
741    *bestmv = this_mv;
742    bestmse = up;
743    *distortion = thismse;
744    *sse1 = sse;
745  }
746
747  this_mv.as_mv.row += 8;
748  /* "halfpix" vertical variance */
749  thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
750  down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
751
752  if (down < bestmse) {
753    *bestmv = this_mv;
754    bestmse = down;
755    *distortion = thismse;
756    *sse1 = sse;
757  }
758
759  /* now check 1 more diagonal - */
760  whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
761  this_mv = startmv;
762
763  switch (whichdir) {
764    case 0:
765      this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
766      this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
767      /* "halfpix" horizontal/vertical variance */
768      thismse =
769          vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
770      break;
771    case 1:
772      this_mv.as_mv.col += 4;
773      this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
774      /* "halfpix" horizontal/vertical variance */
775      thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
776      break;
777    case 2:
778      this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
779      this_mv.as_mv.row += 4;
780      /* "halfpix" horizontal/vertical variance */
781      thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
782      break;
783    case 3:
784    default:
785      this_mv.as_mv.col += 4;
786      this_mv.as_mv.row += 4;
787      /* "halfpix" horizontal/vertical variance */
788      thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
789      break;
790  }
791
792  diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
793
794  if (diag < bestmse) {
795    *bestmv = this_mv;
796    bestmse = diag;
797    *distortion = thismse;
798    *sse1 = sse;
799  }
800
801  return bestmse;
802}
803
804#define CHECK_BOUNDS(range)                    \
805  {                                            \
806    all_in = 1;                                \
807    all_in &= ((br - range) >= x->mv_row_min); \
808    all_in &= ((br + range) <= x->mv_row_max); \
809    all_in &= ((bc - range) >= x->mv_col_min); \
810    all_in &= ((bc + range) <= x->mv_col_max); \
811  }
812
813#define CHECK_POINT                                  \
814  {                                                  \
815    if (this_mv.as_mv.col < x->mv_col_min) continue; \
816    if (this_mv.as_mv.col > x->mv_col_max) continue; \
817    if (this_mv.as_mv.row < x->mv_row_min) continue; \
818    if (this_mv.as_mv.row > x->mv_row_max) continue; \
819  }
820
821#define CHECK_BETTER                                                     \
822  {                                                                      \
823    if (thissad < bestsad) {                                             \
824      thissad +=                                                         \
825          mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); \
826      if (thissad < bestsad) {                                           \
827        bestsad = thissad;                                               \
828        best_site = i;                                                   \
829      }                                                                  \
830    }                                                                    \
831  }
832
833static const MV next_chkpts[6][3] = {
834  { { -2, 0 }, { -1, -2 }, { 1, -2 } }, { { -1, -2 }, { 1, -2 }, { 2, 0 } },
835  { { 1, -2 }, { 2, 0 }, { 1, 2 } },    { { 2, 0 }, { 1, 2 }, { -1, 2 } },
836  { { 1, 2 }, { -1, 2 }, { -2, 0 } },   { { -1, 2 }, { -2, 0 }, { -1, -2 } }
837};
838
839int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
840                   int_mv *best_mv, int search_param, int sad_per_bit,
841                   const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2],
842                   int *mvcost[2], int_mv *center_mv) {
843  MV hex[6] = {
844    { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 }
845  };
846  MV neighbors[4] = { { 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 } };
847  int i, j;
848
849  unsigned char *what = (*(b->base_src) + b->src);
850  int what_stride = b->src_stride;
851  int pre_stride = x->e_mbd.pre.y_stride;
852  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
853
854  int in_what_stride = pre_stride;
855  int br, bc;
856  int_mv this_mv;
857  unsigned int bestsad;
858  unsigned int thissad;
859  unsigned char *base_offset;
860  unsigned char *this_offset;
861  int k = -1;
862  int all_in;
863  int best_site = -1;
864  int hex_range = 127;
865  int dia_range = 8;
866
867  int_mv fcenter_mv;
868  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
869  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
870
871  (void)mvcost;
872
873  /* adjust ref_mv to make sure it is within MV range */
874  vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
875               x->mv_row_max);
876  br = ref_mv->as_mv.row;
877  bc = ref_mv->as_mv.col;
878
879  /* Work out the start point for the search */
880  base_offset = (unsigned char *)(base_pre + d->offset);
881  this_offset = base_offset + (br * (pre_stride)) + bc;
882  this_mv.as_mv.row = br;
883  this_mv.as_mv.col = bc;
884  bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride) +
885            mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
886
887#if CONFIG_MULTI_RES_ENCODING
888  /* Lower search range based on prediction info */
889  if (search_param >= 6)
890    goto cal_neighbors;
891  else if (search_param >= 5)
892    hex_range = 4;
893  else if (search_param >= 4)
894    hex_range = 6;
895  else if (search_param >= 3)
896    hex_range = 15;
897  else if (search_param >= 2)
898    hex_range = 31;
899  else if (search_param >= 1)
900    hex_range = 63;
901
902  dia_range = 8;
903#else
904  (void)search_param;
905#endif
906
907  /* hex search */
908  CHECK_BOUNDS(2)
909
910  if (all_in) {
911    for (i = 0; i < 6; ++i) {
912      this_mv.as_mv.row = br + hex[i].row;
913      this_mv.as_mv.col = bc + hex[i].col;
914      this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
915                    this_mv.as_mv.col;
916      thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
917      CHECK_BETTER
918    }
919  } else {
920    for (i = 0; i < 6; ++i) {
921      this_mv.as_mv.row = br + hex[i].row;
922      this_mv.as_mv.col = bc + hex[i].col;
923      CHECK_POINT
924      this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
925                    this_mv.as_mv.col;
926      thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
927      CHECK_BETTER
928    }
929  }
930
931  if (best_site == -1) {
932    goto cal_neighbors;
933  } else {
934    br += hex[best_site].row;
935    bc += hex[best_site].col;
936    k = best_site;
937  }
938
939  for (j = 1; j < hex_range; ++j) {
940    best_site = -1;
941    CHECK_BOUNDS(2)
942
943    if (all_in) {
944      for (i = 0; i < 3; ++i) {
945        this_mv.as_mv.row = br + next_chkpts[k][i].row;
946        this_mv.as_mv.col = bc + next_chkpts[k][i].col;
947        this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
948                      this_mv.as_mv.col;
949        thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
950        CHECK_BETTER
951      }
952    } else {
953      for (i = 0; i < 3; ++i) {
954        this_mv.as_mv.row = br + next_chkpts[k][i].row;
955        this_mv.as_mv.col = bc + next_chkpts[k][i].col;
956        CHECK_POINT
957        this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
958                      this_mv.as_mv.col;
959        thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
960        CHECK_BETTER
961      }
962    }
963
964    if (best_site == -1) {
965      break;
966    } else {
967      br += next_chkpts[k][best_site].row;
968      bc += next_chkpts[k][best_site].col;
969      k += 5 + best_site;
970      if (k >= 12) {
971        k -= 12;
972      } else if (k >= 6) {
973        k -= 6;
974      }
975    }
976  }
977
978/* check 4 1-away neighbors */
979cal_neighbors:
980  for (j = 0; j < dia_range; ++j) {
981    best_site = -1;
982    CHECK_BOUNDS(1)
983
984    if (all_in) {
985      for (i = 0; i < 4; ++i) {
986        this_mv.as_mv.row = br + neighbors[i].row;
987        this_mv.as_mv.col = bc + neighbors[i].col;
988        this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
989                      this_mv.as_mv.col;
990        thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
991        CHECK_BETTER
992      }
993    } else {
994      for (i = 0; i < 4; ++i) {
995        this_mv.as_mv.row = br + neighbors[i].row;
996        this_mv.as_mv.col = bc + neighbors[i].col;
997        CHECK_POINT
998        this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
999                      this_mv.as_mv.col;
1000        thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
1001        CHECK_BETTER
1002      }
1003    }
1004
1005    if (best_site == -1) {
1006      break;
1007    } else {
1008      br += neighbors[best_site].row;
1009      bc += neighbors[best_site].col;
1010    }
1011  }
1012
1013  best_mv->as_mv.row = br;
1014  best_mv->as_mv.col = bc;
1015
1016  return bestsad;
1017}
1018#undef CHECK_BOUNDS
1019#undef CHECK_POINT
1020#undef CHECK_BETTER
1021
1022int vp8_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1023                             int_mv *best_mv, int search_param, int sad_per_bit,
1024                             int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1025                             int *mvcost[2], int_mv *center_mv) {
1026  int i, j, step;
1027
1028  unsigned char *what = (*(b->base_src) + b->src);
1029  int what_stride = b->src_stride;
1030  unsigned char *in_what;
1031  int pre_stride = x->e_mbd.pre.y_stride;
1032  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1033  int in_what_stride = pre_stride;
1034  unsigned char *best_address;
1035
1036  int tot_steps;
1037  int_mv this_mv;
1038
1039  unsigned int bestsad;
1040  unsigned int thissad;
1041  int best_site = 0;
1042  int last_site = 0;
1043
1044  int ref_row;
1045  int ref_col;
1046  int this_row_offset;
1047  int this_col_offset;
1048  search_site *ss;
1049
1050  unsigned char *check_here;
1051
1052  int *mvsadcost[2];
1053  int_mv fcenter_mv;
1054
1055  mvsadcost[0] = x->mvsadcost[0];
1056  mvsadcost[1] = x->mvsadcost[1];
1057  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1058  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1059
1060  vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1061               x->mv_row_max);
1062  ref_row = ref_mv->as_mv.row;
1063  ref_col = ref_mv->as_mv.col;
1064  *num00 = 0;
1065  best_mv->as_mv.row = ref_row;
1066  best_mv->as_mv.col = ref_col;
1067
1068  /* Work out the start point for the search */
1069  in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1070                              ref_col);
1071  best_address = in_what;
1072
1073  /* Check the starting position */
1074  bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1075            mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1076
1077  /* search_param determines the length of the initial step and hence
1078   * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
1079   * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1080   */
1081  ss = &x->ss[search_param * x->searches_per_step];
1082  tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1083
1084  i = 1;
1085
1086  for (step = 0; step < tot_steps; ++step) {
1087    for (j = 0; j < x->searches_per_step; ++j) {
1088      /* Trap illegal vectors */
1089      this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1090      this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1091
1092      if ((this_col_offset > x->mv_col_min) &&
1093          (this_col_offset < x->mv_col_max) &&
1094          (this_row_offset > x->mv_row_min) &&
1095          (this_row_offset < x->mv_row_max))
1096
1097      {
1098        check_here = ss[i].offset + best_address;
1099        thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1100
1101        if (thissad < bestsad) {
1102          this_mv.as_mv.row = this_row_offset;
1103          this_mv.as_mv.col = this_col_offset;
1104          thissad +=
1105              mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1106
1107          if (thissad < bestsad) {
1108            bestsad = thissad;
1109            best_site = i;
1110          }
1111        }
1112      }
1113
1114      i++;
1115    }
1116
1117    if (best_site != last_site) {
1118      best_mv->as_mv.row += ss[best_site].mv.row;
1119      best_mv->as_mv.col += ss[best_site].mv.col;
1120      best_address += ss[best_site].offset;
1121      last_site = best_site;
1122    } else if (best_address == in_what) {
1123      (*num00)++;
1124    }
1125  }
1126
1127  this_mv.as_mv.row = best_mv->as_mv.row << 3;
1128  this_mv.as_mv.col = best_mv->as_mv.col << 3;
1129
1130  return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1131         mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1132}
1133
1134int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1135                             int_mv *best_mv, int search_param, int sad_per_bit,
1136                             int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1137                             int *mvcost[2], int_mv *center_mv) {
1138  int i, j, step;
1139
1140  unsigned char *what = (*(b->base_src) + b->src);
1141  int what_stride = b->src_stride;
1142  unsigned char *in_what;
1143  int pre_stride = x->e_mbd.pre.y_stride;
1144  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1145  int in_what_stride = pre_stride;
1146  unsigned char *best_address;
1147
1148  int tot_steps;
1149  int_mv this_mv;
1150
1151  unsigned int bestsad;
1152  unsigned int thissad;
1153  int best_site = 0;
1154  int last_site = 0;
1155
1156  int ref_row;
1157  int ref_col;
1158  int this_row_offset;
1159  int this_col_offset;
1160  search_site *ss;
1161
1162  unsigned char *check_here;
1163
1164  int *mvsadcost[2];
1165  int_mv fcenter_mv;
1166
1167  mvsadcost[0] = x->mvsadcost[0];
1168  mvsadcost[1] = x->mvsadcost[1];
1169  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1170  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1171
1172  vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1173               x->mv_row_max);
1174  ref_row = ref_mv->as_mv.row;
1175  ref_col = ref_mv->as_mv.col;
1176  *num00 = 0;
1177  best_mv->as_mv.row = ref_row;
1178  best_mv->as_mv.col = ref_col;
1179
1180  /* Work out the start point for the search */
1181  in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1182                              ref_col);
1183  best_address = in_what;
1184
1185  /* Check the starting position */
1186  bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1187            mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1188
1189  /* search_param determines the length of the initial step and hence the
1190   * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
1191   * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1192   */
1193  ss = &x->ss[search_param * x->searches_per_step];
1194  tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1195
1196  i = 1;
1197
1198  for (step = 0; step < tot_steps; ++step) {
1199    int all_in = 1, t;
1200
1201    /* To know if all neighbor points are within the bounds, 4 bounds
1202     * checking are enough instead of checking 4 bounds for each
1203     * points.
1204     */
1205    all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min);
1206    all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max);
1207    all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min);
1208    all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max);
1209
1210    if (all_in) {
1211      unsigned int sad_array[4];
1212
1213      for (j = 0; j < x->searches_per_step; j += 4) {
1214        const unsigned char *block_offset[4];
1215
1216        for (t = 0; t < 4; ++t) {
1217          block_offset[t] = ss[i + t].offset + best_address;
1218        }
1219
1220        fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1221                       sad_array);
1222
1223        for (t = 0; t < 4; t++, i++) {
1224          if (sad_array[t] < bestsad) {
1225            this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
1226            this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
1227            sad_array[t] +=
1228                mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1229
1230            if (sad_array[t] < bestsad) {
1231              bestsad = sad_array[t];
1232              best_site = i;
1233            }
1234          }
1235        }
1236      }
1237    } else {
1238      for (j = 0; j < x->searches_per_step; ++j) {
1239        /* Trap illegal vectors */
1240        this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1241        this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1242
1243        if ((this_col_offset > x->mv_col_min) &&
1244            (this_col_offset < x->mv_col_max) &&
1245            (this_row_offset > x->mv_row_min) &&
1246            (this_row_offset < x->mv_row_max)) {
1247          check_here = ss[i].offset + best_address;
1248          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1249
1250          if (thissad < bestsad) {
1251            this_mv.as_mv.row = this_row_offset;
1252            this_mv.as_mv.col = this_col_offset;
1253            thissad +=
1254                mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1255
1256            if (thissad < bestsad) {
1257              bestsad = thissad;
1258              best_site = i;
1259            }
1260          }
1261        }
1262        i++;
1263      }
1264    }
1265
1266    if (best_site != last_site) {
1267      best_mv->as_mv.row += ss[best_site].mv.row;
1268      best_mv->as_mv.col += ss[best_site].mv.col;
1269      best_address += ss[best_site].offset;
1270      last_site = best_site;
1271    } else if (best_address == in_what) {
1272      (*num00)++;
1273    }
1274  }
1275
1276  this_mv.as_mv.row = best_mv->as_mv.row * 8;
1277  this_mv.as_mv.col = best_mv->as_mv.col * 8;
1278
1279  return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1280         mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1281}
1282
1283int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1284                          int sad_per_bit, int distance,
1285                          vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1286                          int_mv *center_mv) {
1287  unsigned char *what = (*(b->base_src) + b->src);
1288  int what_stride = b->src_stride;
1289  unsigned char *in_what;
1290  int pre_stride = x->e_mbd.pre.y_stride;
1291  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1292  int in_what_stride = pre_stride;
1293  int mv_stride = pre_stride;
1294  unsigned char *bestaddress;
1295  int_mv *best_mv = &d->bmi.mv;
1296  int_mv this_mv;
1297  unsigned int bestsad;
1298  unsigned int thissad;
1299  int r, c;
1300
1301  unsigned char *check_here;
1302
1303  int ref_row = ref_mv->as_mv.row;
1304  int ref_col = ref_mv->as_mv.col;
1305
1306  int row_min = ref_row - distance;
1307  int row_max = ref_row + distance;
1308  int col_min = ref_col - distance;
1309  int col_max = ref_col + distance;
1310
1311  int *mvsadcost[2];
1312  int_mv fcenter_mv;
1313
1314  mvsadcost[0] = x->mvsadcost[0];
1315  mvsadcost[1] = x->mvsadcost[1];
1316  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1317  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1318
1319  /* Work out the mid point for the search */
1320  in_what = base_pre + d->offset;
1321  bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1322
1323  best_mv->as_mv.row = ref_row;
1324  best_mv->as_mv.col = ref_col;
1325
1326  /* Baseline value at the centre */
1327  bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1328            mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1329
1330  /* Apply further limits to prevent us looking using vectors that
1331   * stretch beyiond the UMV border
1332   */
1333  if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1334
1335  if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1336
1337  if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1338
1339  if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1340
1341  for (r = row_min; r < row_max; ++r) {
1342    this_mv.as_mv.row = r;
1343    check_here = r * mv_stride + in_what + col_min;
1344
1345    for (c = col_min; c < col_max; ++c) {
1346      thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1347
1348      this_mv.as_mv.col = c;
1349      thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1350
1351      if (thissad < bestsad) {
1352        bestsad = thissad;
1353        best_mv->as_mv.row = r;
1354        best_mv->as_mv.col = c;
1355        bestaddress = check_here;
1356      }
1357
1358      check_here++;
1359    }
1360  }
1361
1362  this_mv.as_mv.row = best_mv->as_mv.row << 3;
1363  this_mv.as_mv.col = best_mv->as_mv.col << 3;
1364
1365  return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1366         mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1367}
1368
1369int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1370                          int sad_per_bit, int distance,
1371                          vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1372                          int_mv *center_mv) {
1373  unsigned char *what = (*(b->base_src) + b->src);
1374  int what_stride = b->src_stride;
1375  unsigned char *in_what;
1376  int pre_stride = x->e_mbd.pre.y_stride;
1377  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1378  int in_what_stride = pre_stride;
1379  int mv_stride = pre_stride;
1380  unsigned char *bestaddress;
1381  int_mv *best_mv = &d->bmi.mv;
1382  int_mv this_mv;
1383  unsigned int bestsad;
1384  unsigned int thissad;
1385  int r, c;
1386
1387  unsigned char *check_here;
1388
1389  int ref_row = ref_mv->as_mv.row;
1390  int ref_col = ref_mv->as_mv.col;
1391
1392  int row_min = ref_row - distance;
1393  int row_max = ref_row + distance;
1394  int col_min = ref_col - distance;
1395  int col_max = ref_col + distance;
1396
1397  unsigned int sad_array[3];
1398
1399  int *mvsadcost[2];
1400  int_mv fcenter_mv;
1401
1402  mvsadcost[0] = x->mvsadcost[0];
1403  mvsadcost[1] = x->mvsadcost[1];
1404  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1405  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1406
1407  /* Work out the mid point for the search */
1408  in_what = base_pre + d->offset;
1409  bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1410
1411  best_mv->as_mv.row = ref_row;
1412  best_mv->as_mv.col = ref_col;
1413
1414  /* Baseline value at the centre */
1415  bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1416            mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1417
1418  /* Apply further limits to prevent us looking using vectors that stretch
1419   * beyond the UMV border
1420   */
1421  if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1422
1423  if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1424
1425  if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1426
1427  if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1428
1429  for (r = row_min; r < row_max; ++r) {
1430    this_mv.as_mv.row = r;
1431    check_here = r * mv_stride + in_what + col_min;
1432    c = col_min;
1433
1434    while ((c + 2) < col_max) {
1435      int i;
1436
1437      fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1438
1439      for (i = 0; i < 3; ++i) {
1440        thissad = sad_array[i];
1441
1442        if (thissad < bestsad) {
1443          this_mv.as_mv.col = c;
1444          thissad +=
1445              mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1446
1447          if (thissad < bestsad) {
1448            bestsad = thissad;
1449            best_mv->as_mv.row = r;
1450            best_mv->as_mv.col = c;
1451            bestaddress = check_here;
1452          }
1453        }
1454
1455        check_here++;
1456        c++;
1457      }
1458    }
1459
1460    while (c < col_max) {
1461      thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1462
1463      if (thissad < bestsad) {
1464        this_mv.as_mv.col = c;
1465        thissad +=
1466            mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1467
1468        if (thissad < bestsad) {
1469          bestsad = thissad;
1470          best_mv->as_mv.row = r;
1471          best_mv->as_mv.col = c;
1472          bestaddress = check_here;
1473        }
1474      }
1475
1476      check_here++;
1477      c++;
1478    }
1479  }
1480
1481  this_mv.as_mv.row = best_mv->as_mv.row << 3;
1482  this_mv.as_mv.col = best_mv->as_mv.col << 3;
1483
1484  return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1485         mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1486}
1487
1488int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1489                          int sad_per_bit, int distance,
1490                          vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1491                          int_mv *center_mv) {
1492  unsigned char *what = (*(b->base_src) + b->src);
1493  int what_stride = b->src_stride;
1494  int pre_stride = x->e_mbd.pre.y_stride;
1495  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1496  unsigned char *in_what;
1497  int in_what_stride = pre_stride;
1498  int mv_stride = pre_stride;
1499  unsigned char *bestaddress;
1500  int_mv *best_mv = &d->bmi.mv;
1501  int_mv this_mv;
1502  unsigned int bestsad;
1503  unsigned int thissad;
1504  int r, c;
1505
1506  unsigned char *check_here;
1507
1508  int ref_row = ref_mv->as_mv.row;
1509  int ref_col = ref_mv->as_mv.col;
1510
1511  int row_min = ref_row - distance;
1512  int row_max = ref_row + distance;
1513  int col_min = ref_col - distance;
1514  int col_max = ref_col + distance;
1515
1516  DECLARE_ALIGNED(16, unsigned int, sad_array8[8]);
1517  unsigned int sad_array[3];
1518
1519  int *mvsadcost[2];
1520  int_mv fcenter_mv;
1521
1522  mvsadcost[0] = x->mvsadcost[0];
1523  mvsadcost[1] = x->mvsadcost[1];
1524  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1525  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1526
1527  /* Work out the mid point for the search */
1528  in_what = base_pre + d->offset;
1529  bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1530
1531  best_mv->as_mv.row = ref_row;
1532  best_mv->as_mv.col = ref_col;
1533
1534  /* Baseline value at the centre */
1535  bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1536            mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1537
1538  /* Apply further limits to prevent us looking using vectors that stretch
1539   * beyond the UMV border
1540   */
1541  if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1542
1543  if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1544
1545  if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1546
1547  if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1548
1549  for (r = row_min; r < row_max; ++r) {
1550    this_mv.as_mv.row = r;
1551    check_here = r * mv_stride + in_what + col_min;
1552    c = col_min;
1553
1554    while ((c + 7) < col_max) {
1555      int i;
1556
1557      fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
1558
1559      for (i = 0; i < 8; ++i) {
1560        thissad = sad_array8[i];
1561
1562        if (thissad < bestsad) {
1563          this_mv.as_mv.col = c;
1564          thissad +=
1565              mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1566
1567          if (thissad < bestsad) {
1568            bestsad = thissad;
1569            best_mv->as_mv.row = r;
1570            best_mv->as_mv.col = c;
1571            bestaddress = check_here;
1572          }
1573        }
1574
1575        check_here++;
1576        c++;
1577      }
1578    }
1579
1580    while ((c + 2) < col_max) {
1581      int i;
1582
1583      fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1584
1585      for (i = 0; i < 3; ++i) {
1586        thissad = sad_array[i];
1587
1588        if (thissad < bestsad) {
1589          this_mv.as_mv.col = c;
1590          thissad +=
1591              mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1592
1593          if (thissad < bestsad) {
1594            bestsad = thissad;
1595            best_mv->as_mv.row = r;
1596            best_mv->as_mv.col = c;
1597            bestaddress = check_here;
1598          }
1599        }
1600
1601        check_here++;
1602        c++;
1603      }
1604    }
1605
1606    while (c < col_max) {
1607      thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1608
1609      if (thissad < bestsad) {
1610        this_mv.as_mv.col = c;
1611        thissad +=
1612            mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1613
1614        if (thissad < bestsad) {
1615          bestsad = thissad;
1616          best_mv->as_mv.row = r;
1617          best_mv->as_mv.col = c;
1618          bestaddress = check_here;
1619        }
1620      }
1621
1622      check_here++;
1623      c++;
1624    }
1625  }
1626
1627  this_mv.as_mv.row = best_mv->as_mv.row * 8;
1628  this_mv.as_mv.col = best_mv->as_mv.col * 8;
1629
1630  return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1631         mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1632}
1633
1634int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1635                              int_mv *ref_mv, int error_per_bit,
1636                              int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1637                              int *mvcost[2], int_mv *center_mv) {
1638  MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1639  int i, j;
1640  short this_row_offset, this_col_offset;
1641
1642  int what_stride = b->src_stride;
1643  int pre_stride = x->e_mbd.pre.y_stride;
1644  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1645  int in_what_stride = pre_stride;
1646  unsigned char *what = (*(b->base_src) + b->src);
1647  unsigned char *best_address =
1648      (unsigned char *)(base_pre + d->offset +
1649                        (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1650  unsigned char *check_here;
1651  int_mv this_mv;
1652  unsigned int bestsad;
1653  unsigned int thissad;
1654
1655  int *mvsadcost[2];
1656  int_mv fcenter_mv;
1657
1658  mvsadcost[0] = x->mvsadcost[0];
1659  mvsadcost[1] = x->mvsadcost[1];
1660  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1661  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1662
1663  bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1664            mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1665
1666  for (i = 0; i < search_range; ++i) {
1667    int best_site = -1;
1668
1669    for (j = 0; j < 4; ++j) {
1670      this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1671      this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1672
1673      if ((this_col_offset > x->mv_col_min) &&
1674          (this_col_offset < x->mv_col_max) &&
1675          (this_row_offset > x->mv_row_min) &&
1676          (this_row_offset < x->mv_row_max)) {
1677        check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1678                     best_address;
1679        thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1680
1681        if (thissad < bestsad) {
1682          this_mv.as_mv.row = this_row_offset;
1683          this_mv.as_mv.col = this_col_offset;
1684          thissad +=
1685              mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1686
1687          if (thissad < bestsad) {
1688            bestsad = thissad;
1689            best_site = j;
1690          }
1691        }
1692      }
1693    }
1694
1695    if (best_site == -1) {
1696      break;
1697    } else {
1698      ref_mv->as_mv.row += neighbors[best_site].row;
1699      ref_mv->as_mv.col += neighbors[best_site].col;
1700      best_address += (neighbors[best_site].row) * in_what_stride +
1701                      neighbors[best_site].col;
1702    }
1703  }
1704
1705  this_mv.as_mv.row = ref_mv->as_mv.row << 3;
1706  this_mv.as_mv.col = ref_mv->as_mv.col << 3;
1707
1708  return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1709         mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1710}
1711
1712int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1713                              int_mv *ref_mv, int error_per_bit,
1714                              int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1715                              int *mvcost[2], int_mv *center_mv) {
1716  MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1717  int i, j;
1718  short this_row_offset, this_col_offset;
1719
1720  int what_stride = b->src_stride;
1721  int pre_stride = x->e_mbd.pre.y_stride;
1722  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1723  int in_what_stride = pre_stride;
1724  unsigned char *what = (*(b->base_src) + b->src);
1725  unsigned char *best_address =
1726      (unsigned char *)(base_pre + d->offset +
1727                        (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1728  unsigned char *check_here;
1729  int_mv this_mv;
1730  unsigned int bestsad;
1731  unsigned int thissad;
1732
1733  int *mvsadcost[2];
1734  int_mv fcenter_mv;
1735
1736  mvsadcost[0] = x->mvsadcost[0];
1737  mvsadcost[1] = x->mvsadcost[1];
1738  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1739  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1740
1741  bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1742            mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1743
1744  for (i = 0; i < search_range; ++i) {
1745    int best_site = -1;
1746    int all_in = 1;
1747
1748    all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
1749    all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
1750    all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
1751    all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
1752
1753    if (all_in) {
1754      unsigned int sad_array[4];
1755      const unsigned char *block_offset[4];
1756      block_offset[0] = best_address - in_what_stride;
1757      block_offset[1] = best_address - 1;
1758      block_offset[2] = best_address + 1;
1759      block_offset[3] = best_address + in_what_stride;
1760
1761      fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1762                     sad_array);
1763
1764      for (j = 0; j < 4; ++j) {
1765        if (sad_array[j] < bestsad) {
1766          this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
1767          this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
1768          sad_array[j] +=
1769              mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1770
1771          if (sad_array[j] < bestsad) {
1772            bestsad = sad_array[j];
1773            best_site = j;
1774          }
1775        }
1776      }
1777    } else {
1778      for (j = 0; j < 4; ++j) {
1779        this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1780        this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1781
1782        if ((this_col_offset > x->mv_col_min) &&
1783            (this_col_offset < x->mv_col_max) &&
1784            (this_row_offset > x->mv_row_min) &&
1785            (this_row_offset < x->mv_row_max)) {
1786          check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1787                       best_address;
1788          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1789
1790          if (thissad < bestsad) {
1791            this_mv.as_mv.row = this_row_offset;
1792            this_mv.as_mv.col = this_col_offset;
1793            thissad +=
1794                mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1795
1796            if (thissad < bestsad) {
1797              bestsad = thissad;
1798              best_site = j;
1799            }
1800          }
1801        }
1802      }
1803    }
1804
1805    if (best_site == -1) {
1806      break;
1807    } else {
1808      ref_mv->as_mv.row += neighbors[best_site].row;
1809      ref_mv->as_mv.col += neighbors[best_site].col;
1810      best_address += (neighbors[best_site].row) * in_what_stride +
1811                      neighbors[best_site].col;
1812    }
1813  }
1814
1815  this_mv.as_mv.row = ref_mv->as_mv.row * 8;
1816  this_mv.as_mv.col = ref_mv->as_mv.col * 8;
1817
1818  return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1819         mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1820}
1821
1822#ifdef VP8_ENTROPY_STATS
1823void print_mode_context(void) {
1824  FILE *f = fopen("modecont.c", "w");
1825  int i, j;
1826
1827  fprintf(f, "#include \"entropy.h\"\n");
1828  fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
1829  fprintf(f, "{\n");
1830
1831  for (j = 0; j < 6; ++j) {
1832    fprintf(f, "  { /* %d */\n", j);
1833    fprintf(f, "    ");
1834
1835    for (i = 0; i < 4; ++i) {
1836      int overal_prob;
1837      int this_prob;
1838      int count;
1839
1840      /* Overall probs */
1841      count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
1842
1843      if (count)
1844        overal_prob = 256 * mv_mode_cts[i][0] / count;
1845      else
1846        overal_prob = 128;
1847
1848      if (overal_prob == 0) overal_prob = 1;
1849
1850      /* context probs */
1851      count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
1852
1853      if (count)
1854        this_prob = 256 * mv_ref_ct[j][i][0] / count;
1855      else
1856        this_prob = 128;
1857
1858      if (this_prob == 0) this_prob = 1;
1859
1860      fprintf(f, "%5d, ", this_prob);
1861    }
1862
1863    fprintf(f, "  },\n");
1864  }
1865
1866  fprintf(f, "};\n");
1867  fclose(f);
1868}
1869
1870/* MV ref count VP8_ENTROPY_STATS stats code */
1871#ifdef VP8_ENTROPY_STATS
1872void init_mv_ref_counts() {
1873  memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
1874  memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
1875}
1876
1877void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) {
1878  if (m == ZEROMV) {
1879    ++mv_ref_ct[ct[0]][0][0];
1880    ++mv_mode_cts[0][0];
1881  } else {
1882    ++mv_ref_ct[ct[0]][0][1];
1883    ++mv_mode_cts[0][1];
1884
1885    if (m == NEARESTMV) {
1886      ++mv_ref_ct[ct[1]][1][0];
1887      ++mv_mode_cts[1][0];
1888    } else {
1889      ++mv_ref_ct[ct[1]][1][1];
1890      ++mv_mode_cts[1][1];
1891
1892      if (m == NEARMV) {
1893        ++mv_ref_ct[ct[2]][2][0];
1894        ++mv_mode_cts[2][0];
1895      } else {
1896        ++mv_ref_ct[ct[2]][2][1];
1897        ++mv_mode_cts[2][1];
1898
1899        if (m == NEWMV) {
1900          ++mv_ref_ct[ct[3]][3][0];
1901          ++mv_mode_cts[3][0];
1902        } else {
1903          ++mv_ref_ct[ct[3]][3][1];
1904          ++mv_mode_cts[3][1];
1905        }
1906      }
1907    }
1908  }
1909}
1910
1911#endif /* END MV ref count VP8_ENTROPY_STATS stats code */
1912
1913#endif
1914