vp9_mcomp.c revision 91037db265ecdd914a26e056cf69207b4f50924e
1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include <stdio.h>
12#include <limits.h>
13#include <math.h>
14
15#include "vp9/encoder/vp9_onyx_int.h"
16#include "vp9/encoder/vp9_mcomp.h"
17#include "vpx_mem/vpx_mem.h"
18#include "./vpx_config.h"
19#include "vp9/common/vp9_findnearmv.h"
20#include "vp9/common/vp9_common.h"
21
22// #define NEW_DIAMOND_SEARCH
23
24void vp9_clamp_mv_min_max(MACROBLOCK *x, int_mv *ref_mv) {
25  int col_min = (ref_mv->as_mv.col >> 3) - MAX_FULL_PEL_VAL +
26                ((ref_mv->as_mv.col & 7) ? 1 : 0);
27  int row_min = (ref_mv->as_mv.row >> 3) - MAX_FULL_PEL_VAL +
28                ((ref_mv->as_mv.row & 7) ? 1 : 0);
29  int col_max = (ref_mv->as_mv.col >> 3) + MAX_FULL_PEL_VAL;
30  int row_max = (ref_mv->as_mv.row >> 3) + MAX_FULL_PEL_VAL;
31
32  /* Get intersection of UMV window and valid MV window to reduce # of checks in diamond search. */
33  if (x->mv_col_min < col_min)
34    x->mv_col_min = col_min;
35  if (x->mv_col_max > col_max)
36    x->mv_col_max = col_max;
37  if (x->mv_row_min < row_min)
38    x->mv_row_min = row_min;
39  if (x->mv_row_max > row_max)
40    x->mv_row_max = row_max;
41}
42
43int vp9_init_search_range(VP9_COMP *cpi, int size) {
44  int sr = 0;
45
46  // Minimum search size no matter what the passed in value.
47  size = MAX(16, size);
48
49  while ((size << sr) < MAX_FULL_PEL_VAL)
50    sr++;
51
52  if (sr)
53    sr--;
54
55  sr += cpi->sf.reduce_first_step_size;
56  sr = MIN(sr, (cpi->sf.max_step_search_steps - 2));
57  return sr;
58}
59
60int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
61                    int weight, int ishp) {
62  MV v;
63  v.row = mv->as_mv.row - ref->as_mv.row;
64  v.col = mv->as_mv.col - ref->as_mv.col;
65  return ROUND_POWER_OF_TWO((mvjcost[vp9_get_mv_joint(&v)] +
66                             mvcost[0][v.row] +
67                             mvcost[1][v.col]) * weight, 7);
68}
69
70static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
71                       int error_per_bit, int ishp) {
72  if (mvcost) {
73    MV v;
74    v.row = mv->as_mv.row - ref->as_mv.row;
75    v.col = mv->as_mv.col - ref->as_mv.col;
76    return ROUND_POWER_OF_TWO((mvjcost[vp9_get_mv_joint(&v)] +
77                               mvcost[0][v.row] +
78                               mvcost[1][v.col]) * error_per_bit, 13);
79  }
80  return 0;
81}
82
83static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvjsadcost,
84                          int *mvsadcost[2], int error_per_bit) {
85  if (mvsadcost) {
86    MV v;
87    v.row = mv->as_mv.row - ref->as_mv.row;
88    v.col = mv->as_mv.col - ref->as_mv.col;
89    return ROUND_POWER_OF_TWO((mvjsadcost[vp9_get_mv_joint(&v)] +
90                               mvsadcost[0][v.row] +
91                               mvsadcost[1][v.col]) * error_per_bit, 8);
92  }
93  return 0;
94}
95
96void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
97  int len;
98  int search_site_count = 0;
99
100  // Generate offsets for 4 search sites per step.
101  x->ss[search_site_count].mv.col = 0;
102  x->ss[search_site_count].mv.row = 0;
103  x->ss[search_site_count].offset = 0;
104  search_site_count++;
105
106  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
107    // Compute offsets for search sites.
108    x->ss[search_site_count].mv.col = 0;
109    x->ss[search_site_count].mv.row = -len;
110    x->ss[search_site_count].offset = -len * stride;
111    search_site_count++;
112
113    // Compute offsets for search sites.
114    x->ss[search_site_count].mv.col = 0;
115    x->ss[search_site_count].mv.row = len;
116    x->ss[search_site_count].offset = len * stride;
117    search_site_count++;
118
119    // Compute offsets for search sites.
120    x->ss[search_site_count].mv.col = -len;
121    x->ss[search_site_count].mv.row = 0;
122    x->ss[search_site_count].offset = -len;
123    search_site_count++;
124
125    // Compute offsets for search sites.
126    x->ss[search_site_count].mv.col = len;
127    x->ss[search_site_count].mv.row = 0;
128    x->ss[search_site_count].offset = len;
129    search_site_count++;
130  }
131
132  x->ss_count = search_site_count;
133  x->searches_per_step = 4;
134}
135
136void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
137  int len;
138  int search_site_count = 0;
139
140  // Generate offsets for 8 search sites per step.
141  x->ss[search_site_count].mv.col = 0;
142  x->ss[search_site_count].mv.row = 0;
143  x->ss[search_site_count].offset = 0;
144  search_site_count++;
145
146  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
147    // Compute offsets for search sites.
148    x->ss[search_site_count].mv.col = 0;
149    x->ss[search_site_count].mv.row = -len;
150    x->ss[search_site_count].offset = -len * stride;
151    search_site_count++;
152
153    // Compute offsets for search sites.
154    x->ss[search_site_count].mv.col = 0;
155    x->ss[search_site_count].mv.row = len;
156    x->ss[search_site_count].offset = len * stride;
157    search_site_count++;
158
159    // Compute offsets for search sites.
160    x->ss[search_site_count].mv.col = -len;
161    x->ss[search_site_count].mv.row = 0;
162    x->ss[search_site_count].offset = -len;
163    search_site_count++;
164
165    // Compute offsets for search sites.
166    x->ss[search_site_count].mv.col = len;
167    x->ss[search_site_count].mv.row = 0;
168    x->ss[search_site_count].offset = len;
169    search_site_count++;
170
171    // Compute offsets for search sites.
172    x->ss[search_site_count].mv.col = -len;
173    x->ss[search_site_count].mv.row = -len;
174    x->ss[search_site_count].offset = -len * stride - len;
175    search_site_count++;
176
177    // Compute offsets for search sites.
178    x->ss[search_site_count].mv.col = len;
179    x->ss[search_site_count].mv.row = -len;
180    x->ss[search_site_count].offset = -len * stride + len;
181    search_site_count++;
182
183    // Compute offsets for search sites.
184    x->ss[search_site_count].mv.col = -len;
185    x->ss[search_site_count].mv.row = len;
186    x->ss[search_site_count].offset = len * stride - len;
187    search_site_count++;
188
189    // Compute offsets for search sites.
190    x->ss[search_site_count].mv.col = len;
191    x->ss[search_site_count].mv.row = len;
192    x->ss[search_site_count].offset = len * stride + len;
193    search_site_count++;
194  }
195
196  x->ss_count = search_site_count;
197  x->searches_per_step = 8;
198}
199
200/*
201 * To avoid the penalty for crossing cache-line read, preload the reference
202 * area in a small buffer, which is aligned to make sure there won't be crossing
203 * cache-line read while reading from this buffer. This reduced the cpu
204 * cycles spent on reading ref data in sub-pixel filter functions.
205 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
206 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
207 * could reduce the area.
208 */
209
210/* estimated cost of a motion vector (r,c) */
211#define MVC(r, c)                                       \
212    (mvcost ?                                           \
213     ((mvjcost[((r) != rr) * 2 + ((c) != rc)] +         \
214       mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
215      error_per_bit + 4096) >> 13 : 0)
216
217
218#define SP(x) (((x) & 7) << 1)  // convert motion vector component to offset
219                                // for svf calc
220
221#define IFMVCV(r, c, s, e)                                \
222    if (c >= minc && c <= maxc && r >= minr && r <= maxr) \
223      s                                                   \
224    else                                                  \
225      e;
226
227/* pointer to predictor base of a motionvector */
228#define PRE(r, c) (y + (((r) >> 3) * y_stride + ((c) >> 3) -(offset)))
229
230/* returns subpixel variance error function */
231#define DIST(r, c) \
232    vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, src_stride, &sse)
233
234/* checks if (r, c) has better score than previous best */
235#define CHECK_BETTER(v, r, c) \
236    IFMVCV(r, c, {                                                       \
237      thismse = (DIST(r, c));                                            \
238      if ((v = MVC(r, c) + thismse) < besterr) {                         \
239        besterr = v;                                                     \
240        br = r;                                                          \
241        bc = c;                                                          \
242        *distortion = thismse;                                           \
243        *sse1 = sse;                                                     \
244      }                                                                  \
245    },                                                                   \
246    v = INT_MAX;)
247
248int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x,
249                                             int_mv *bestmv, int_mv *ref_mv,
250                                             int error_per_bit,
251                                             const vp9_variance_fn_ptr_t *vfp,
252                                             int *mvjcost, int *mvcost[2],
253                                             int *distortion,
254                                             unsigned int *sse1) {
255  uint8_t *z = x->plane[0].src.buf;
256  int src_stride = x->plane[0].src.stride;
257  MACROBLOCKD *xd = &x->e_mbd;
258
259  int rr, rc, br, bc, hstep;
260  int tr, tc;
261  unsigned int besterr = INT_MAX;
262  unsigned int left, right, up, down, diag;
263  unsigned int sse;
264  unsigned int whichdir;
265  unsigned int halfiters = 4;
266  unsigned int quarteriters = 4;
267  unsigned int eighthiters = 4;
268  int thismse;
269  int maxc, minc, maxr, minr;
270  int y_stride;
271  int offset;
272  int usehp = xd->allow_high_precision_mv;
273
274  uint8_t *y = xd->plane[0].pre[0].buf +
275               (bestmv->as_mv.row) * xd->plane[0].pre[0].stride +
276               bestmv->as_mv.col;
277
278  y_stride = xd->plane[0].pre[0].stride;
279
280  rr = ref_mv->as_mv.row;
281  rc = ref_mv->as_mv.col;
282  br = bestmv->as_mv.row << 3;
283  bc = bestmv->as_mv.col << 3;
284  hstep = 4;
285  minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) - ((1 << MV_MAX_BITS) - 1));
286  maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) + ((1 << MV_MAX_BITS) - 1));
287  minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) - ((1 << MV_MAX_BITS) - 1));
288  maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) + ((1 << MV_MAX_BITS) - 1));
289
290  tr = br;
291  tc = bc;
292
293
294  offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
295
296  // central mv
297  bestmv->as_mv.row <<= 3;
298  bestmv->as_mv.col <<= 3;
299
300  // calculate central point error
301  besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
302  *distortion = besterr;
303  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost,
304                         error_per_bit, xd->allow_high_precision_mv);
305
306  // TODO: Each subsequent iteration checks at least one point in
307  // common with the last iteration could be 2 ( if diag selected)
308  while (--halfiters) {
309    // 1/2 pel
310    CHECK_BETTER(left, tr, tc - hstep);
311    CHECK_BETTER(right, tr, tc + hstep);
312    CHECK_BETTER(up, tr - hstep, tc);
313    CHECK_BETTER(down, tr + hstep, tc);
314
315    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
316
317    switch (whichdir) {
318      case 0:
319        CHECK_BETTER(diag, tr - hstep, tc - hstep);
320        break;
321      case 1:
322        CHECK_BETTER(diag, tr - hstep, tc + hstep);
323        break;
324      case 2:
325        CHECK_BETTER(diag, tr + hstep, tc - hstep);
326        break;
327      case 3:
328        CHECK_BETTER(diag, tr + hstep, tc + hstep);
329        break;
330    }
331
332    // no reason to check the same one again.
333    if (tr == br && tc == bc)
334      break;
335
336    tr = br;
337    tc = bc;
338  }
339
340  // TODO: Each subsequent iteration checks at least one point in common with
341  // the last iteration could be 2 ( if diag selected) 1/4 pel
342  hstep >>= 1;
343  while (--quarteriters) {
344    CHECK_BETTER(left, tr, tc - hstep);
345    CHECK_BETTER(right, tr, tc + hstep);
346    CHECK_BETTER(up, tr - hstep, tc);
347    CHECK_BETTER(down, tr + hstep, tc);
348
349    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
350
351    switch (whichdir) {
352      case 0:
353        CHECK_BETTER(diag, tr - hstep, tc - hstep);
354        break;
355      case 1:
356        CHECK_BETTER(diag, tr - hstep, tc + hstep);
357        break;
358      case 2:
359        CHECK_BETTER(diag, tr + hstep, tc - hstep);
360        break;
361      case 3:
362        CHECK_BETTER(diag, tr + hstep, tc + hstep);
363        break;
364    }
365
366    // no reason to check the same one again.
367    if (tr == br && tc == bc)
368      break;
369
370    tr = br;
371    tc = bc;
372  }
373
374  if (xd->allow_high_precision_mv) {
375    usehp = vp9_use_mv_hp(&ref_mv->as_mv);
376  } else {
377    usehp = 0;
378  }
379
380  if (usehp) {
381    hstep >>= 1;
382    while (--eighthiters) {
383      CHECK_BETTER(left, tr, tc - hstep);
384      CHECK_BETTER(right, tr, tc + hstep);
385      CHECK_BETTER(up, tr - hstep, tc);
386      CHECK_BETTER(down, tr + hstep, tc);
387
388      whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
389
390      switch (whichdir) {
391        case 0:
392          CHECK_BETTER(diag, tr - hstep, tc - hstep);
393          break;
394        case 1:
395          CHECK_BETTER(diag, tr - hstep, tc + hstep);
396          break;
397        case 2:
398          CHECK_BETTER(diag, tr + hstep, tc - hstep);
399          break;
400        case 3:
401          CHECK_BETTER(diag, tr + hstep, tc + hstep);
402          break;
403      }
404
405      // no reason to check the same one again.
406      if (tr == br && tc == bc)
407        break;
408
409      tr = br;
410      tc = bc;
411    }
412  }
413  bestmv->as_mv.row = br;
414  bestmv->as_mv.col = bc;
415
416  if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
417      (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
418    return INT_MAX;
419
420  return besterr;
421}
422
423#undef DIST
424/* returns subpixel variance error function */
425#define DIST(r, c) \
426    vfp->svaf(PRE(r, c), y_stride, SP(c), SP(r), \
427              z, src_stride, &sse, second_pred)
428
429int vp9_find_best_sub_pixel_comp(MACROBLOCK *x,
430                                 int_mv *bestmv, int_mv *ref_mv,
431                                 int error_per_bit,
432                                 const vp9_variance_fn_ptr_t *vfp,
433                                 int *mvjcost, int *mvcost[2],
434                                 int *distortion,
435                                 unsigned int *sse1,
436                                 const uint8_t *second_pred, int w, int h) {
437  uint8_t *z = x->plane[0].src.buf;
438  int src_stride = x->plane[0].src.stride;
439  MACROBLOCKD *xd = &x->e_mbd;
440
441  int rr, rc, br, bc, hstep;
442  int tr, tc;
443  unsigned int besterr = INT_MAX;
444  unsigned int left, right, up, down, diag;
445  unsigned int sse;
446  unsigned int whichdir;
447  unsigned int halfiters = 4;
448  unsigned int quarteriters = 4;
449  unsigned int eighthiters = 4;
450  int thismse;
451  int maxc, minc, maxr, minr;
452  int y_stride;
453  int offset;
454  int usehp = xd->allow_high_precision_mv;
455
456  DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
457  uint8_t *y = xd->plane[0].pre[0].buf +
458               (bestmv->as_mv.row) * xd->plane[0].pre[0].stride +
459               bestmv->as_mv.col;
460
461  y_stride = xd->plane[0].pre[0].stride;
462
463  rr = ref_mv->as_mv.row;
464  rc = ref_mv->as_mv.col;
465  br = bestmv->as_mv.row << 3;
466  bc = bestmv->as_mv.col << 3;
467  hstep = 4;
468  minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) -
469             ((1 << MV_MAX_BITS) - 1));
470  maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) +
471             ((1 << MV_MAX_BITS) - 1));
472  minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) -
473             ((1 << MV_MAX_BITS) - 1));
474  maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) +
475             ((1 << MV_MAX_BITS) - 1));
476
477  tr = br;
478  tc = bc;
479
480
481  offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
482
483  // central mv
484  bestmv->as_mv.row <<= 3;
485  bestmv->as_mv.col <<= 3;
486
487  // calculate central point error
488  // TODO(yunqingwang): central pointer error was already calculated in full-
489  // pixel search, and can be passed in this function.
490  comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
491  besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
492  *distortion = besterr;
493  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost,
494                         error_per_bit, xd->allow_high_precision_mv);
495
496  // Each subsequent iteration checks at least one point in
497  // common with the last iteration could be 2 ( if diag selected)
498  while (--halfiters) {
499    // 1/2 pel
500    CHECK_BETTER(left, tr, tc - hstep);
501    CHECK_BETTER(right, tr, tc + hstep);
502    CHECK_BETTER(up, tr - hstep, tc);
503    CHECK_BETTER(down, tr + hstep, tc);
504
505    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
506
507    switch (whichdir) {
508      case 0:
509        CHECK_BETTER(diag, tr - hstep, tc - hstep);
510        break;
511      case 1:
512        CHECK_BETTER(diag, tr - hstep, tc + hstep);
513        break;
514      case 2:
515        CHECK_BETTER(diag, tr + hstep, tc - hstep);
516        break;
517      case 3:
518        CHECK_BETTER(diag, tr + hstep, tc + hstep);
519        break;
520    }
521
522    // no reason to check the same one again.
523    if (tr == br && tc == bc)
524      break;
525
526    tr = br;
527    tc = bc;
528  }
529
530  // Each subsequent iteration checks at least one point in common with
531  // the last iteration could be 2 ( if diag selected) 1/4 pel
532  hstep >>= 1;
533  while (--quarteriters) {
534    CHECK_BETTER(left, tr, tc - hstep);
535    CHECK_BETTER(right, tr, tc + hstep);
536    CHECK_BETTER(up, tr - hstep, tc);
537    CHECK_BETTER(down, tr + hstep, tc);
538
539    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
540
541    switch (whichdir) {
542      case 0:
543        CHECK_BETTER(diag, tr - hstep, tc - hstep);
544        break;
545      case 1:
546        CHECK_BETTER(diag, tr - hstep, tc + hstep);
547        break;
548      case 2:
549        CHECK_BETTER(diag, tr + hstep, tc - hstep);
550        break;
551      case 3:
552        CHECK_BETTER(diag, tr + hstep, tc + hstep);
553        break;
554    }
555
556    // no reason to check the same one again.
557    if (tr == br && tc == bc)
558      break;
559
560    tr = br;
561    tc = bc;
562  }
563
564  if (xd->allow_high_precision_mv) {
565    usehp = vp9_use_mv_hp(&ref_mv->as_mv);
566  } else {
567    usehp = 0;
568  }
569
570  if (usehp) {
571    hstep >>= 1;
572    while (--eighthiters) {
573      CHECK_BETTER(left, tr, tc - hstep);
574      CHECK_BETTER(right, tr, tc + hstep);
575      CHECK_BETTER(up, tr - hstep, tc);
576      CHECK_BETTER(down, tr + hstep, tc);
577
578      whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
579
580      switch (whichdir) {
581        case 0:
582          CHECK_BETTER(diag, tr - hstep, tc - hstep);
583          break;
584        case 1:
585          CHECK_BETTER(diag, tr - hstep, tc + hstep);
586          break;
587        case 2:
588          CHECK_BETTER(diag, tr + hstep, tc - hstep);
589          break;
590        case 3:
591          CHECK_BETTER(diag, tr + hstep, tc + hstep);
592          break;
593      }
594
595      // no reason to check the same one again.
596      if (tr == br && tc == bc)
597        break;
598
599      tr = br;
600      tc = bc;
601    }
602  }
603  bestmv->as_mv.row = br;
604  bestmv->as_mv.col = bc;
605
606  if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
607      (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
608    return INT_MAX;
609
610  return besterr;
611}
612
613
614#undef MVC
615#undef PRE
616#undef DIST
617#undef IFMVCV
618#undef CHECK_BETTER
619#undef MIN
620#undef MAX
621
622int vp9_find_best_sub_pixel_step(MACROBLOCK *x,
623                                 int_mv *bestmv, int_mv *ref_mv,
624                                 int error_per_bit,
625                                 const vp9_variance_fn_ptr_t *vfp,
626                                 int *mvjcost, int *mvcost[2], int *distortion,
627                                 unsigned int *sse1) {
628  int bestmse = INT_MAX;
629  int_mv startmv;
630  int_mv this_mv;
631  int_mv orig_mv;
632  int yrow_movedback = 0, ycol_movedback = 0;
633  uint8_t *z = x->plane[0].src.buf;
634  int src_stride = x->plane[0].src.stride;
635  int left, right, up, down, diag;
636  unsigned int sse;
637  int whichdir;
638  int thismse;
639  int y_stride;
640  MACROBLOCKD *xd = &x->e_mbd;
641  int usehp = xd->allow_high_precision_mv;
642
643  uint8_t *y = xd->plane[0].pre[0].buf +
644               (bestmv->as_mv.row) * xd->plane[0].pre[0].stride +
645               bestmv->as_mv.col;
646  y_stride = xd->plane[0].pre[0].stride;
647
648  // central mv
649  bestmv->as_mv.row <<= 3;
650  bestmv->as_mv.col <<= 3;
651  startmv = *bestmv;
652  orig_mv = *bestmv;
653
654  // calculate central point error
655  bestmse = vfp->vf(y, y_stride, z, src_stride, sse1);
656  *distortion = bestmse;
657  bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit,
658                         xd->allow_high_precision_mv);
659
660  // go left then right and check error
661  this_mv.as_mv.row = startmv.as_mv.row;
662  this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
663  thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, src_stride, &sse);
664  left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
665                               xd->allow_high_precision_mv);
666
667  if (left < bestmse) {
668    *bestmv = this_mv;
669    bestmse = left;
670    *distortion = thismse;
671    *sse1 = sse;
672  }
673
674  this_mv.as_mv.col += 8;
675  thismse = vfp->svf_halfpix_h(y, y_stride, z, src_stride, &sse);
676  right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
677                                error_per_bit, xd->allow_high_precision_mv);
678
679  if (right < bestmse) {
680    *bestmv = this_mv;
681    bestmse = right;
682    *distortion = thismse;
683    *sse1 = sse;
684  }
685
686  // go up then down and check error
687  this_mv.as_mv.col = startmv.as_mv.col;
688  this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
689  thismse =  vfp->svf_halfpix_v(y - y_stride, y_stride, z, src_stride, &sse);
690  up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
691                             xd->allow_high_precision_mv);
692
693  if (up < bestmse) {
694    *bestmv = this_mv;
695    bestmse = up;
696    *distortion = thismse;
697    *sse1 = sse;
698  }
699
700  this_mv.as_mv.row += 8;
701  thismse = vfp->svf_halfpix_v(y, y_stride, z, src_stride, &sse);
702  down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
703                               xd->allow_high_precision_mv);
704
705  if (down < bestmse) {
706    *bestmv = this_mv;
707    bestmse = down;
708    *distortion = thismse;
709    *sse1 = sse;
710  }
711
712
713  // now check 1 more diagonal
714  whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
715  // for(whichdir =0;whichdir<4;whichdir++)
716  // {
717  this_mv = startmv;
718
719  switch (whichdir) {
720    case 0:
721      this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
722      this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
723      thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, src_stride,
724                                    &sse);
725      break;
726    case 1:
727      this_mv.as_mv.col += 4;
728      this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
729      thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, src_stride,
730                                    &sse);
731      break;
732    case 2:
733      this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
734      this_mv.as_mv.row += 4;
735      thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, src_stride, &sse);
736      break;
737    case 3:
738    default:
739      this_mv.as_mv.col += 4;
740      this_mv.as_mv.row += 4;
741      thismse = vfp->svf_halfpix_hv(y, y_stride, z, src_stride, &sse);
742      break;
743  }
744
745  diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
746                               xd->allow_high_precision_mv);
747
748  if (diag < bestmse) {
749    *bestmv = this_mv;
750    bestmse = diag;
751    *distortion = thismse;
752    *sse1 = sse;
753  }
754
755//  }
756
757
758  // time to check quarter pels.
759  if (bestmv->as_mv.row < startmv.as_mv.row) {
760    y -= y_stride;
761    yrow_movedback = 1;
762  }
763
764  if (bestmv->as_mv.col < startmv.as_mv.col) {
765    y--;
766    ycol_movedback = 1;
767  }
768
769  startmv = *bestmv;
770
771
772
773  // go left then right and check error
774  this_mv.as_mv.row = startmv.as_mv.row;
775
776  if (startmv.as_mv.col & 7) {
777    this_mv.as_mv.col = startmv.as_mv.col - 2;
778    thismse = vfp->svf(y, y_stride,
779                       SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
780                       z, src_stride, &sse);
781  } else {
782    this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
783    thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z,
784                       src_stride, &sse);
785  }
786
787  left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
788                               xd->allow_high_precision_mv);
789
790  if (left < bestmse) {
791    *bestmv = this_mv;
792    bestmse = left;
793    *distortion = thismse;
794    *sse1 = sse;
795  }
796
797  this_mv.as_mv.col += 4;
798  thismse = vfp->svf(y, y_stride,
799                     SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
800                     z, src_stride, &sse);
801  right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
802                                error_per_bit, xd->allow_high_precision_mv);
803
804  if (right < bestmse) {
805    *bestmv = this_mv;
806    bestmse = right;
807    *distortion = thismse;
808    *sse1 = sse;
809  }
810
811  // go up then down and check error
812  this_mv.as_mv.col = startmv.as_mv.col;
813
814  if (startmv.as_mv.row & 7) {
815    this_mv.as_mv.row = startmv.as_mv.row - 2;
816    thismse = vfp->svf(y, y_stride,
817                       SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
818                       z, src_stride, &sse);
819  } else {
820    this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
821    thismse = vfp->svf(y - y_stride, y_stride, SP(this_mv.as_mv.col), SP(6),
822                       z, src_stride, &sse);
823  }
824
825  up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
826                             xd->allow_high_precision_mv);
827
828  if (up < bestmse) {
829    *bestmv = this_mv;
830    bestmse = up;
831    *distortion = thismse;
832    *sse1 = sse;
833  }
834
835  this_mv.as_mv.row += 4;
836  thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
837                     z, src_stride, &sse);
838  down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
839                               xd->allow_high_precision_mv);
840
841  if (down < bestmse) {
842    *bestmv = this_mv;
843    bestmse = down;
844    *distortion = thismse;
845    *sse1 = sse;
846  }
847
848
849  // now check 1 more diagonal
850  whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
851
852//  for(whichdir=0;whichdir<4;whichdir++)
853//  {
854  this_mv = startmv;
855
856  switch (whichdir) {
857    case 0:
858
859      if (startmv.as_mv.row & 7) {
860        this_mv.as_mv.row -= 2;
861
862        if (startmv.as_mv.col & 7) {
863          this_mv.as_mv.col -= 2;
864          thismse = vfp->svf(y, y_stride,
865                             SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
866                             z, src_stride, &sse);
867        } else {
868          this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
869          thismse = vfp->svf(y - 1, y_stride,
870                             SP(6), SP(this_mv.as_mv.row), z, src_stride, &sse);
871        }
872      } else {
873        this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
874
875        if (startmv.as_mv.col & 7) {
876          this_mv.as_mv.col -= 2;
877          thismse = vfp->svf(y - y_stride, y_stride,
878                             SP(this_mv.as_mv.col), SP(6), z, src_stride, &sse);
879        } else {
880          this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
881          thismse = vfp->svf(y - y_stride - 1, y_stride,
882                             SP(6), SP(6), z, src_stride, &sse);
883        }
884      }
885
886      break;
887    case 1:
888      this_mv.as_mv.col += 2;
889
890      if (startmv.as_mv.row & 7) {
891        this_mv.as_mv.row -= 2;
892        thismse = vfp->svf(y, y_stride,
893                           SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
894                           z, src_stride, &sse);
895      } else {
896        this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
897        thismse = vfp->svf(y - y_stride, y_stride,
898                           SP(this_mv.as_mv.col), SP(6), z, src_stride, &sse);
899      }
900
901      break;
902    case 2:
903      this_mv.as_mv.row += 2;
904
905      if (startmv.as_mv.col & 7) {
906        this_mv.as_mv.col -= 2;
907        thismse = vfp->svf(y, y_stride,
908                           SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
909                           z, src_stride, &sse);
910      } else {
911        this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
912        thismse = vfp->svf(y - 1, y_stride, SP(6), SP(this_mv.as_mv.row), z,
913                           src_stride, &sse);
914      }
915
916      break;
917    case 3:
918      this_mv.as_mv.col += 2;
919      this_mv.as_mv.row += 2;
920      thismse = vfp->svf(y, y_stride,
921                         SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
922                         z, src_stride, &sse);
923      break;
924  }
925
926  diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
927                               xd->allow_high_precision_mv);
928
929  if (diag < bestmse) {
930    *bestmv = this_mv;
931    bestmse = diag;
932    *distortion = thismse;
933    *sse1 = sse;
934  }
935
936  if (x->e_mbd.allow_high_precision_mv) {
937    usehp = vp9_use_mv_hp(&ref_mv->as_mv);
938  } else {
939    usehp = 0;
940  }
941  if (!usehp)
942    return bestmse;
943
944  /* Now do 1/8th pixel */
945  if (bestmv->as_mv.row < orig_mv.as_mv.row && !yrow_movedback) {
946    y -= y_stride;
947    yrow_movedback = 1;
948  }
949
950  if (bestmv->as_mv.col < orig_mv.as_mv.col && !ycol_movedback) {
951    y--;
952    ycol_movedback = 1;
953  }
954
955  startmv = *bestmv;
956
957  // go left then right and check error
958  this_mv.as_mv.row = startmv.as_mv.row;
959
960  if (startmv.as_mv.col & 7) {
961    this_mv.as_mv.col = startmv.as_mv.col - 1;
962    thismse = vfp->svf(y, y_stride,
963                       SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
964                       z, src_stride, &sse);
965  } else {
966    this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
967    thismse = vfp->svf(y - 1, y_stride, SP(7), SP(this_mv.as_mv.row),
968                       z, src_stride, &sse);
969  }
970
971  left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
972                               xd->allow_high_precision_mv);
973
974  if (left < bestmse) {
975    *bestmv = this_mv;
976    bestmse = left;
977    *distortion = thismse;
978    *sse1 = sse;
979  }
980
981  this_mv.as_mv.col += 2;
982  thismse = vfp->svf(y, y_stride, SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
983                     z, src_stride, &sse);
984  right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
985                                error_per_bit, xd->allow_high_precision_mv);
986
987  if (right < bestmse) {
988    *bestmv = this_mv;
989    bestmse = right;
990    *distortion = thismse;
991    *sse1 = sse;
992  }
993
994  // go up then down and check error
995  this_mv.as_mv.col = startmv.as_mv.col;
996
997  if (startmv.as_mv.row & 7) {
998    this_mv.as_mv.row = startmv.as_mv.row - 1;
999    thismse = vfp->svf(y, y_stride,
1000                       SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
1001                       z, src_stride, &sse);
1002  } else {
1003    this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
1004    thismse = vfp->svf(y - y_stride, y_stride,
1005                       SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse);
1006  }
1007
1008  up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
1009                             xd->allow_high_precision_mv);
1010
1011  if (up < bestmse) {
1012    *bestmv = this_mv;
1013    bestmse = up;
1014    *distortion = thismse;
1015    *sse1 = sse;
1016  }
1017
1018  this_mv.as_mv.row += 2;
1019  thismse = vfp->svf(y, y_stride,
1020                     SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
1021                     z, src_stride, &sse);
1022  down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
1023                               xd->allow_high_precision_mv);
1024
1025  if (down < bestmse) {
1026    *bestmv = this_mv;
1027    bestmse = down;
1028    *distortion = thismse;
1029    *sse1 = sse;
1030  }
1031
1032  // now check 1 more diagonal
1033  whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
1034
1035//  for(whichdir=0;whichdir<4;whichdir++)
1036//  {
1037  this_mv = startmv;
1038
1039  switch (whichdir) {
1040    case 0:
1041
1042      if (startmv.as_mv.row & 7) {
1043        this_mv.as_mv.row -= 1;
1044
1045        if (startmv.as_mv.col & 7) {
1046          this_mv.as_mv.col -= 1;
1047          thismse = vfp->svf(y, y_stride,
1048                             SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
1049                             z, src_stride, &sse);
1050        } else {
1051          this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
1052          thismse = vfp->svf(y - 1, y_stride,
1053                             SP(7), SP(this_mv.as_mv.row),
1054                             z, src_stride, &sse);
1055        }
1056      } else {
1057        this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
1058
1059        if (startmv.as_mv.col & 7) {
1060          this_mv.as_mv.col -= 1;
1061          thismse = vfp->svf(y - y_stride, y_stride,
1062                             SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse);
1063        } else {
1064          this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
1065          thismse = vfp->svf(y - y_stride - 1, y_stride,
1066                             SP(7), SP(7), z, src_stride, &sse);
1067        }
1068      }
1069
1070      break;
1071    case 1:
1072      this_mv.as_mv.col += 1;
1073
1074      if (startmv.as_mv.row & 7) {
1075        this_mv.as_mv.row -= 1;
1076        thismse = vfp->svf(y, y_stride,
1077                           SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
1078                           z, src_stride, &sse);
1079      } else {
1080        this_mv.as_mv.row = (startmv.as_mv.row - 8) | 7;
1081        thismse = vfp->svf(y - y_stride, y_stride,
1082                           SP(this_mv.as_mv.col), SP(7), z, src_stride, &sse);
1083      }
1084
1085      break;
1086    case 2:
1087      this_mv.as_mv.row += 1;
1088
1089      if (startmv.as_mv.col & 7) {
1090        this_mv.as_mv.col -= 1;
1091        thismse = vfp->svf(y, y_stride,
1092                           SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
1093                           z, src_stride, &sse);
1094      } else {
1095        this_mv.as_mv.col = (startmv.as_mv.col - 8) | 7;
1096        thismse = vfp->svf(y - 1, y_stride,
1097                           SP(7), SP(this_mv.as_mv.row), z, src_stride, &sse);
1098      }
1099
1100      break;
1101    case 3:
1102      this_mv.as_mv.col += 1;
1103      this_mv.as_mv.row += 1;
1104      thismse = vfp->svf(y, y_stride,
1105                         SP(this_mv.as_mv.col), SP(this_mv.as_mv.row),
1106                         z, src_stride, &sse);
1107      break;
1108  }
1109
1110  diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
1111                               xd->allow_high_precision_mv);
1112
1113  if (diag < bestmse) {
1114    *bestmv = this_mv;
1115    bestmse = diag;
1116    *distortion = thismse;
1117    *sse1 = sse;
1118  }
1119
1120  return bestmse;
1121}
1122
1123#undef SP
1124
1125int vp9_find_best_half_pixel_step(MACROBLOCK *x,
1126                                  int_mv *bestmv, int_mv *ref_mv,
1127                                  int error_per_bit,
1128                                  const vp9_variance_fn_ptr_t *vfp,
1129                                  int *mvjcost, int *mvcost[2],
1130                                  int *distortion,
1131                                  unsigned int *sse1) {
1132  int bestmse = INT_MAX;
1133  int_mv startmv;
1134  int_mv this_mv;
1135  uint8_t *z = x->plane[0].src.buf;
1136  int src_stride = x->plane[0].src.stride;
1137  int left, right, up, down, diag;
1138  unsigned int sse;
1139  int whichdir;
1140  int thismse;
1141  int y_stride;
1142  MACROBLOCKD *xd = &x->e_mbd;
1143
1144  uint8_t *y = xd->plane[0].pre[0].buf +
1145      (bestmv->as_mv.row) * xd->plane[0].pre[0].stride + bestmv->as_mv.col;
1146  y_stride = xd->plane[0].pre[0].stride;
1147
1148  // central mv
1149  bestmv->as_mv.row <<= 3;
1150  bestmv->as_mv.col <<= 3;
1151  startmv = *bestmv;
1152
1153  // calculate central point error
1154  bestmse = vfp->vf(y, y_stride, z, src_stride, sse1);
1155  *distortion = bestmse;
1156  bestmse += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit,
1157                         xd->allow_high_precision_mv);
1158
1159  // go left then right and check error
1160  this_mv.as_mv.row = startmv.as_mv.row;
1161  this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
1162  thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, src_stride, &sse);
1163  left = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
1164                               xd->allow_high_precision_mv);
1165
1166  if (left < bestmse) {
1167    *bestmv = this_mv;
1168    bestmse = left;
1169    *distortion = thismse;
1170    *sse1 = sse;
1171  }
1172
1173  this_mv.as_mv.col += 8;
1174  thismse = vfp->svf_halfpix_h(y, y_stride, z, src_stride, &sse);
1175  right = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
1176                                error_per_bit, xd->allow_high_precision_mv);
1177
1178  if (right < bestmse) {
1179    *bestmv = this_mv;
1180    bestmse = right;
1181    *distortion = thismse;
1182    *sse1 = sse;
1183  }
1184
1185  // go up then down and check error
1186  this_mv.as_mv.col = startmv.as_mv.col;
1187  this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
1188  thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, src_stride, &sse);
1189  up = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
1190                             xd->allow_high_precision_mv);
1191
1192  if (up < bestmse) {
1193    *bestmv = this_mv;
1194    bestmse = up;
1195    *distortion = thismse;
1196    *sse1 = sse;
1197  }
1198
1199  this_mv.as_mv.row += 8;
1200  thismse = vfp->svf_halfpix_v(y, y_stride, z, src_stride, &sse);
1201  down = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
1202                               xd->allow_high_precision_mv);
1203
1204  if (down < bestmse) {
1205    *bestmv = this_mv;
1206    bestmse = down;
1207    *distortion = thismse;
1208    *sse1 = sse;
1209  }
1210
1211  // now check 1 more diagonal -
1212  whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
1213  this_mv = startmv;
1214
1215  switch (whichdir) {
1216    case 0:
1217      this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
1218      this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
1219      thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride,
1220                                    z, src_stride, &sse);
1221      break;
1222    case 1:
1223      this_mv.as_mv.col += 4;
1224      this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
1225      thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride,
1226                                    z, src_stride, &sse);
1227      break;
1228    case 2:
1229      this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
1230      this_mv.as_mv.row += 4;
1231      thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, src_stride, &sse);
1232      break;
1233    case 3:
1234    default:
1235      this_mv.as_mv.col += 4;
1236      this_mv.as_mv.row += 4;
1237      thismse = vfp->svf_halfpix_hv(y, y_stride, z, src_stride, &sse);
1238      break;
1239  }
1240
1241  diag = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit,
1242                               xd->allow_high_precision_mv);
1243
1244  if (diag < bestmse) {
1245    *bestmv = this_mv;
1246    bestmse = diag;
1247    *distortion = thismse;
1248    *sse1 = sse;
1249  }
1250
1251  return bestmse;
1252}
1253
1254#define CHECK_BOUNDS(range) \
1255  {\
1256    all_in = 1;\
1257    all_in &= ((br-range) >= x->mv_row_min);\
1258    all_in &= ((br+range) <= x->mv_row_max);\
1259    all_in &= ((bc-range) >= x->mv_col_min);\
1260    all_in &= ((bc+range) <= x->mv_col_max);\
1261  }
1262
1263#define CHECK_POINT \
1264  {\
1265    if (this_mv.as_mv.col < x->mv_col_min) continue;\
1266    if (this_mv.as_mv.col > x->mv_col_max) continue;\
1267    if (this_mv.as_mv.row < x->mv_row_min) continue;\
1268    if (this_mv.as_mv.row > x->mv_row_max) continue;\
1269  }
1270
1271#define CHECK_BETTER \
1272  {\
1273    if (thissad < bestsad)\
1274    {\
1275      thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, \
1276                                sad_per_bit);\
1277      if (thissad < bestsad)\
1278      {\
1279        bestsad = thissad;\
1280        best_site = i;\
1281      }\
1282    }\
1283  }
1284
1285static const MV next_chkpts[6][3] = {
1286  {{ -2, 0}, { -1, -2}, {1, -2}},
1287  {{ -1, -2}, {1, -2}, {2, 0}},
1288  {{1, -2}, {2, 0}, {1, 2}},
1289  {{2, 0}, {1, 2}, { -1, 2}},
1290  {{1, 2}, { -1, 2}, { -2, 0}},
1291  {{ -1, 2}, { -2, 0}, { -1, -2}}
1292};
1293
1294int vp9_hex_search
1295(
1296  MACROBLOCK *x,
1297  int_mv *ref_mv,
1298  int_mv *best_mv,
1299  int search_param,
1300  int sad_per_bit,
1301  const vp9_variance_fn_ptr_t *vfp,
1302  int *mvjsadcost, int *mvsadcost[2],
1303  int *mvjcost, int *mvcost[2],
1304  int_mv *center_mv
1305) {
1306  const MACROBLOCKD* const xd = &x->e_mbd;
1307  MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} };
1308  MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}};
1309  int i, j;
1310
1311  uint8_t *what = x->plane[0].src.buf;
1312  int what_stride = x->plane[0].src.stride;
1313  int in_what_stride = xd->plane[0].pre[0].stride;
1314  int br, bc;
1315  int_mv this_mv;
1316  unsigned int bestsad = 0x7fffffff;
1317  unsigned int thissad;
1318  uint8_t *base_offset;
1319  uint8_t *this_offset;
1320  int k = -1;
1321  int all_in;
1322  int best_site = -1;
1323
1324  int_mv fcenter_mv;
1325  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1326  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1327
1328  // adjust ref_mv to make sure it is within MV range
1329  clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1330  br = ref_mv->as_mv.row;
1331  bc = ref_mv->as_mv.col;
1332
1333  // Work out the start point for the search
1334  base_offset = (uint8_t *)(xd->plane[0].pre[0].buf);
1335  this_offset = base_offset + (br * (xd->plane[0].pre[0].stride)) + bc;
1336  this_mv.as_mv.row = br;
1337  this_mv.as_mv.col = bc;
1338  bestsad = vfp->sdf(what, what_stride, this_offset,
1339                     in_what_stride, 0x7fffffff)
1340            + mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost,
1341                             sad_per_bit);
1342
1343  // hex search
1344  // j=0
1345  CHECK_BOUNDS(2)
1346
1347  if (all_in) {
1348    for (i = 0; i < 6; i++) {
1349      this_mv.as_mv.row = br + hex[i].row;
1350      this_mv.as_mv.col = bc + hex[i].col;
1351      this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
1352      thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
1353      CHECK_BETTER
1354    }
1355  } else {
1356    for (i = 0; i < 6; i++) {
1357      this_mv.as_mv.row = br + hex[i].row;
1358      this_mv.as_mv.col = bc + hex[i].col;
1359      CHECK_POINT
1360      this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col;
1361      thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
1362      CHECK_BETTER
1363    }
1364  }
1365
1366  if (best_site == -1)
1367    goto cal_neighbors;
1368  else {
1369    br += hex[best_site].row;
1370    bc += hex[best_site].col;
1371    k = best_site;
1372  }
1373
1374  for (j = 1; j < 127; j++) {
1375    best_site = -1;
1376    CHECK_BOUNDS(2)
1377
1378    if (all_in) {
1379      for (i = 0; i < 3; i++) {
1380        this_mv.as_mv.row = br + next_chkpts[k][i].row;
1381        this_mv.as_mv.col = bc + next_chkpts[k][i].col;
1382        this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
1383        thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
1384        CHECK_BETTER
1385      }
1386    } else {
1387      for (i = 0; i < 3; i++) {
1388        this_mv.as_mv.row = br + next_chkpts[k][i].row;
1389        this_mv.as_mv.col = bc + next_chkpts[k][i].col;
1390        CHECK_POINT
1391        this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
1392        thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
1393        CHECK_BETTER
1394      }
1395    }
1396
1397    if (best_site == -1)
1398      break;
1399    else {
1400      br += next_chkpts[k][best_site].row;
1401      bc += next_chkpts[k][best_site].col;
1402      k += 5 + best_site;
1403      if (k >= 12) k -= 12;
1404      else if (k >= 6) k -= 6;
1405    }
1406  }
1407
1408  // check 4 1-away neighbors
1409cal_neighbors:
1410  for (j = 0; j < 32; j++) {
1411    best_site = -1;
1412    CHECK_BOUNDS(1)
1413
1414    if (all_in) {
1415      for (i = 0; i < 4; i++) {
1416        this_mv.as_mv.row = br + neighbors[i].row;
1417        this_mv.as_mv.col = bc + neighbors[i].col;
1418        this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
1419        thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
1420        CHECK_BETTER
1421      }
1422    } else {
1423      for (i = 0; i < 4; i++) {
1424        this_mv.as_mv.row = br + neighbors[i].row;
1425        this_mv.as_mv.col = bc + neighbors[i].col;
1426        CHECK_POINT
1427        this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col;
1428        thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad);
1429        CHECK_BETTER
1430      }
1431    }
1432
1433    if (best_site == -1)
1434      break;
1435    else {
1436      br += neighbors[best_site].row;
1437      bc += neighbors[best_site].col;
1438    }
1439  }
1440
1441  best_mv->as_mv.row = br;
1442  best_mv->as_mv.col = bc;
1443
1444  return bestsad;
1445}
1446#undef CHECK_BOUNDS
1447#undef CHECK_POINT
1448#undef CHECK_BETTER
1449
1450int vp9_diamond_search_sad_c(MACROBLOCK *x,
1451                             int_mv *ref_mv, int_mv *best_mv,
1452                             int search_param, int sad_per_bit, int *num00,
1453                             vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
1454                             int *mvcost[2], int_mv *center_mv) {
1455  int i, j, step;
1456
1457  const MACROBLOCKD* const xd = &x->e_mbd;
1458  uint8_t *what = x->plane[0].src.buf;
1459  int what_stride = x->plane[0].src.stride;
1460  uint8_t *in_what;
1461  int in_what_stride = xd->plane[0].pre[0].stride;
1462  uint8_t *best_address;
1463
1464  int tot_steps;
1465  int_mv this_mv;
1466
1467  int bestsad = INT_MAX;
1468  int best_site = 0;
1469  int last_site = 0;
1470
1471  int ref_row, ref_col;
1472  int this_row_offset, this_col_offset;
1473  search_site *ss;
1474
1475  uint8_t *check_here;
1476  int thissad;
1477  int_mv fcenter_mv;
1478
1479  int *mvjsadcost = x->nmvjointsadcost;
1480  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1481
1482  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1483  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1484
1485  clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1486  ref_row = ref_mv->as_mv.row;
1487  ref_col = ref_mv->as_mv.col;
1488  *num00 = 0;
1489  best_mv->as_mv.row = ref_row;
1490  best_mv->as_mv.col = ref_col;
1491
1492  // Work out the start point for the search
1493  in_what = (uint8_t *)(xd->plane[0].pre[0].buf +
1494                        (ref_row * (xd->plane[0].pre[0].stride)) + ref_col);
1495  best_address = in_what;
1496
1497  // Check the starting position
1498  bestsad = fn_ptr->sdf(what, what_stride, in_what,
1499                        in_what_stride, 0x7fffffff)
1500            + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
1501                             sad_per_bit);
1502
1503  // search_param determines the length of the initial step and hence the number of iterations
1504  // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1505  ss = &x->ss[search_param * x->searches_per_step];
1506  tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1507
1508  i = 1;
1509
1510  for (step = 0; step < tot_steps; step++) {
1511    for (j = 0; j < x->searches_per_step; j++) {
1512      // Trap illegal vectors
1513      this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1514      this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1515
1516      if ((this_col_offset > x->mv_col_min) &&
1517          (this_col_offset < x->mv_col_max) &&
1518          (this_row_offset > x->mv_row_min) &&
1519          (this_row_offset < x->mv_row_max)) {
1520        check_here = ss[i].offset + best_address;
1521        thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
1522                              bestsad);
1523
1524        if (thissad < bestsad) {
1525          this_mv.as_mv.row = this_row_offset;
1526          this_mv.as_mv.col = this_col_offset;
1527          thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1528                                    mvjsadcost, mvsadcost, sad_per_bit);
1529
1530          if (thissad < bestsad) {
1531            bestsad = thissad;
1532            best_site = i;
1533          }
1534        }
1535      }
1536
1537      i++;
1538    }
1539
1540    if (best_site != last_site) {
1541      best_mv->as_mv.row += ss[best_site].mv.row;
1542      best_mv->as_mv.col += ss[best_site].mv.col;
1543      best_address += ss[best_site].offset;
1544      last_site = best_site;
1545#if defined(NEW_DIAMOND_SEARCH)
1546      while (1) {
1547        this_row_offset = best_mv->as_mv.row + ss[best_site].mv.row;
1548        this_col_offset = best_mv->as_mv.col + ss[best_site].mv.col;
1549        if ((this_col_offset > x->mv_col_min) &&
1550            (this_col_offset < x->mv_col_max) &&
1551            (this_row_offset > x->mv_row_min) &&
1552            (this_row_offset < x->mv_row_max)) {
1553          check_here = ss[best_site].offset + best_address;
1554          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
1555                                bestsad);
1556          if (thissad < bestsad) {
1557            this_mv.as_mv.row = this_row_offset;
1558            this_mv.as_mv.col = this_col_offset;
1559            thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1560                                      mvjsadcost, mvsadcost, sad_per_bit);
1561            if (thissad < bestsad) {
1562              bestsad = thissad;
1563              best_mv->as_mv.row += ss[best_site].mv.row;
1564              best_mv->as_mv.col += ss[best_site].mv.col;
1565              best_address += ss[best_site].offset;
1566              continue;
1567            }
1568          }
1569        }
1570        break;
1571      };
1572#endif
1573    } else if (best_address == in_what)
1574      (*num00)++;
1575  }
1576
1577  this_mv.as_mv.row = best_mv->as_mv.row << 3;
1578  this_mv.as_mv.col = best_mv->as_mv.col << 3;
1579
1580  if (bestsad == INT_MAX)
1581    return INT_MAX;
1582
1583  return
1584      fn_ptr->vf(what, what_stride, best_address, in_what_stride,
1585                 (unsigned int *)(&thissad)) +
1586      mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
1587                  xd->allow_high_precision_mv);
1588}
1589
1590int vp9_diamond_search_sadx4(MACROBLOCK *x,
1591                             int_mv *ref_mv, int_mv *best_mv, int search_param,
1592                             int sad_per_bit, int *num00,
1593                             vp9_variance_fn_ptr_t *fn_ptr,
1594                             int *mvjcost, int *mvcost[2], int_mv *center_mv) {
1595  int i, j, step;
1596
1597  const MACROBLOCKD* const xd = &x->e_mbd;
1598  uint8_t *what = x->plane[0].src.buf;
1599  int what_stride = x->plane[0].src.stride;
1600  uint8_t *in_what;
1601  int in_what_stride = xd->plane[0].pre[0].stride;
1602  uint8_t *best_address;
1603
1604  int tot_steps;
1605  int_mv this_mv;
1606
1607  unsigned int bestsad = INT_MAX;
1608  int best_site = 0;
1609  int last_site = 0;
1610
1611  int ref_row;
1612  int ref_col;
1613  int this_row_offset;
1614  int this_col_offset;
1615  search_site *ss;
1616
1617  uint8_t *check_here;
1618  unsigned int thissad;
1619  int_mv fcenter_mv;
1620
1621  int *mvjsadcost = x->nmvjointsadcost;
1622  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1623
1624  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1625  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1626
1627  clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1628  ref_row = ref_mv->as_mv.row;
1629  ref_col = ref_mv->as_mv.col;
1630  *num00 = 0;
1631  best_mv->as_mv.row = ref_row;
1632  best_mv->as_mv.col = ref_col;
1633
1634  // Work out the start point for the search
1635  in_what = (uint8_t *)(xd->plane[0].pre[0].buf +
1636                        (ref_row * (xd->plane[0].pre[0].stride)) + ref_col);
1637  best_address = in_what;
1638
1639  // Check the starting position
1640  bestsad = fn_ptr->sdf(what, what_stride,
1641                        in_what, in_what_stride, 0x7fffffff)
1642            + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
1643                             sad_per_bit);
1644
1645  // search_param determines the length of the initial step and hence the number of iterations
1646  // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1647  ss = &x->ss[search_param * x->searches_per_step];
1648  tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1649
1650  i = 1;
1651
1652  for (step = 0; step < tot_steps; step++) {
1653    int all_in = 1, t;
1654
1655    // To know if all neighbor points are within the bounds, 4 bounds checking are enough instead of
1656    // checking 4 bounds for each points.
1657    all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min);
1658    all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max);
1659    all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min);
1660    all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max);
1661
1662    if (all_in) {
1663      unsigned int sad_array[4];
1664
1665      for (j = 0; j < x->searches_per_step; j += 4) {
1666        unsigned char const *block_offset[4];
1667
1668        for (t = 0; t < 4; t++)
1669          block_offset[t] = ss[i + t].offset + best_address;
1670
1671        fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1672                       sad_array);
1673
1674        for (t = 0; t < 4; t++, i++) {
1675          if (sad_array[t] < bestsad) {
1676            this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
1677            this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
1678            sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
1679                                           mvjsadcost, mvsadcost, sad_per_bit);
1680
1681            if (sad_array[t] < bestsad) {
1682              bestsad = sad_array[t];
1683              best_site = i;
1684            }
1685          }
1686        }
1687      }
1688    } else {
1689      for (j = 0; j < x->searches_per_step; j++) {
1690        // Trap illegal vectors
1691        this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1692        this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1693
1694        if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1695            (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) {
1696          check_here = ss[i].offset + best_address;
1697          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
1698
1699          if (thissad < bestsad) {
1700            this_mv.as_mv.row = this_row_offset;
1701            this_mv.as_mv.col = this_col_offset;
1702            thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1703                                      mvjsadcost, mvsadcost, sad_per_bit);
1704
1705            if (thissad < bestsad) {
1706              bestsad = thissad;
1707              best_site = i;
1708            }
1709          }
1710        }
1711        i++;
1712      }
1713    }
1714    if (best_site != last_site) {
1715      best_mv->as_mv.row += ss[best_site].mv.row;
1716      best_mv->as_mv.col += ss[best_site].mv.col;
1717      best_address += ss[best_site].offset;
1718      last_site = best_site;
1719#if defined(NEW_DIAMOND_SEARCH)
1720      while (1) {
1721        this_row_offset = best_mv->as_mv.row + ss[best_site].mv.row;
1722        this_col_offset = best_mv->as_mv.col + ss[best_site].mv.col;
1723        if ((this_col_offset > x->mv_col_min) &&
1724            (this_col_offset < x->mv_col_max) &&
1725            (this_row_offset > x->mv_row_min) &&
1726            (this_row_offset < x->mv_row_max)) {
1727          check_here = ss[best_site].offset + best_address;
1728          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
1729                                bestsad);
1730          if (thissad < bestsad) {
1731            this_mv.as_mv.row = this_row_offset;
1732            this_mv.as_mv.col = this_col_offset;
1733            thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1734                                      mvjsadcost, mvsadcost, sad_per_bit);
1735            if (thissad < bestsad) {
1736              bestsad = thissad;
1737              best_mv->as_mv.row += ss[best_site].mv.row;
1738              best_mv->as_mv.col += ss[best_site].mv.col;
1739              best_address += ss[best_site].offset;
1740              continue;
1741            }
1742          }
1743        }
1744        break;
1745      };
1746#endif
1747    } else if (best_address == in_what)
1748      (*num00)++;
1749  }
1750
1751  this_mv.as_mv.row = best_mv->as_mv.row << 3;
1752  this_mv.as_mv.col = best_mv->as_mv.col << 3;
1753
1754  if (bestsad == INT_MAX)
1755    return INT_MAX;
1756
1757  return
1758      fn_ptr->vf(what, what_stride, best_address, in_what_stride,
1759                 (unsigned int *)(&thissad)) +
1760      mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
1761                  xd->allow_high_precision_mv);
1762}
1763
1764/* do_refine: If last step (1-away) of n-step search doesn't pick the center
1765              point as the best match, we will do a final 1-away diamond
1766              refining search  */
1767
1768int vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x,
1769                           int_mv *mvp_full, int step_param,
1770                           int sadpb, int further_steps,
1771                           int do_refine, vp9_variance_fn_ptr_t *fn_ptr,
1772                           int_mv *ref_mv, int_mv *dst_mv) {
1773  int_mv temp_mv;
1774  int thissme, n, num00;
1775  int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
1776                                        step_param, sadpb, &num00,
1777                                        fn_ptr, x->nmvjointcost,
1778                                        x->mvcost, ref_mv);
1779  dst_mv->as_int = temp_mv.as_int;
1780
1781  n = num00;
1782  num00 = 0;
1783
1784  /* If there won't be more n-step search, check to see if refining search is needed. */
1785  if (n > further_steps)
1786    do_refine = 0;
1787
1788  while (n < further_steps) {
1789    n++;
1790
1791    if (num00)
1792      num00--;
1793    else {
1794      thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
1795                                        step_param + n, sadpb, &num00,
1796                                        fn_ptr, x->nmvjointcost, x->mvcost,
1797                                        ref_mv);
1798
1799      /* check to see if refining search is needed. */
1800      if (num00 > (further_steps - n))
1801        do_refine = 0;
1802
1803      if (thissme < bestsme) {
1804        bestsme = thissme;
1805        dst_mv->as_int = temp_mv.as_int;
1806      }
1807    }
1808  }
1809
1810  /* final 1-away diamond refining search */
1811  if (do_refine == 1) {
1812    int search_range = 8;
1813    int_mv best_mv;
1814    best_mv.as_int = dst_mv->as_int;
1815    thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range,
1816                                       fn_ptr, x->nmvjointcost, x->mvcost,
1817                                       ref_mv);
1818
1819    if (thissme < bestsme) {
1820      bestsme = thissme;
1821      dst_mv->as_int = best_mv.as_int;
1822    }
1823  }
1824  return bestsme;
1825}
1826
1827int vp9_full_search_sad_c(MACROBLOCK *x, int_mv *ref_mv,
1828                          int sad_per_bit, int distance,
1829                          vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
1830                          int *mvcost[2],
1831                          int_mv *center_mv, int n) {
1832  const MACROBLOCKD* const xd = &x->e_mbd;
1833  uint8_t *what = x->plane[0].src.buf;
1834  int what_stride = x->plane[0].src.stride;
1835  uint8_t *in_what;
1836  int in_what_stride = xd->plane[0].pre[0].stride;
1837  int mv_stride = xd->plane[0].pre[0].stride;
1838  uint8_t *bestaddress;
1839  int_mv *best_mv = &x->e_mbd.mode_info_context->bmi[n].as_mv[0];
1840  int_mv this_mv;
1841  int bestsad = INT_MAX;
1842  int r, c;
1843
1844  uint8_t *check_here;
1845  int thissad;
1846
1847  int ref_row = ref_mv->as_mv.row;
1848  int ref_col = ref_mv->as_mv.col;
1849
1850  int row_min = ref_row - distance;
1851  int row_max = ref_row + distance;
1852  int col_min = ref_col - distance;
1853  int col_max = ref_col + distance;
1854  int_mv fcenter_mv;
1855
1856  int *mvjsadcost = x->nmvjointsadcost;
1857  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1858
1859  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1860  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1861
1862  // Work out the mid point for the search
1863  in_what = xd->plane[0].pre[0].buf;
1864  bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col;
1865
1866  best_mv->as_mv.row = ref_row;
1867  best_mv->as_mv.col = ref_col;
1868
1869  // Baseline value at the centre
1870  bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
1871                        in_what_stride, 0x7fffffff)
1872            + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
1873                             sad_per_bit);
1874
1875  // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
1876  if (col_min < x->mv_col_min)
1877    col_min = x->mv_col_min;
1878
1879  if (col_max > x->mv_col_max)
1880    col_max = x->mv_col_max;
1881
1882  if (row_min < x->mv_row_min)
1883    row_min = x->mv_row_min;
1884
1885  if (row_max > x->mv_row_max)
1886    row_max = x->mv_row_max;
1887
1888  for (r = row_min; r < row_max; r++) {
1889    this_mv.as_mv.row = r;
1890    check_here = r * mv_stride + in_what + col_min;
1891
1892    for (c = col_min; c < col_max; c++) {
1893      thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
1894
1895      this_mv.as_mv.col = c;
1896      thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
1897                                 mvjsadcost, mvsadcost, sad_per_bit);
1898
1899      if (thissad < bestsad) {
1900        bestsad = thissad;
1901        best_mv->as_mv.row = r;
1902        best_mv->as_mv.col = c;
1903        bestaddress = check_here;
1904      }
1905
1906      check_here++;
1907    }
1908  }
1909
1910  this_mv.as_mv.row = best_mv->as_mv.row << 3;
1911  this_mv.as_mv.col = best_mv->as_mv.col << 3;
1912
1913  if (bestsad < INT_MAX)
1914    return
1915        fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
1916                   (unsigned int *)(&thissad)) +
1917        mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
1918                    xd->allow_high_precision_mv);
1919  else
1920    return INT_MAX;
1921}
1922
1923int vp9_full_search_sadx3(MACROBLOCK *x, int_mv *ref_mv,
1924                          int sad_per_bit, int distance,
1925                          vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
1926                          int *mvcost[2], int_mv *center_mv, int n) {
1927  const MACROBLOCKD* const xd = &x->e_mbd;
1928  uint8_t *what = x->plane[0].src.buf;
1929  int what_stride = x->plane[0].src.stride;
1930  uint8_t *in_what;
1931  int in_what_stride = xd->plane[0].pre[0].stride;
1932  int mv_stride = xd->plane[0].pre[0].stride;
1933  uint8_t *bestaddress;
1934  int_mv *best_mv = &x->e_mbd.mode_info_context->bmi[n].as_mv[0];
1935  int_mv this_mv;
1936  unsigned int bestsad = INT_MAX;
1937  int r, c;
1938
1939  uint8_t *check_here;
1940  unsigned int thissad;
1941
1942  int ref_row = ref_mv->as_mv.row;
1943  int ref_col = ref_mv->as_mv.col;
1944
1945  int row_min = ref_row - distance;
1946  int row_max = ref_row + distance;
1947  int col_min = ref_col - distance;
1948  int col_max = ref_col + distance;
1949
1950  unsigned int sad_array[3];
1951  int_mv fcenter_mv;
1952
1953  int *mvjsadcost = x->nmvjointsadcost;
1954  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1955
1956  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1957  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1958
1959  // Work out the mid point for the search
1960  in_what = xd->plane[0].pre[0].buf;
1961  bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col;
1962
1963  best_mv->as_mv.row = ref_row;
1964  best_mv->as_mv.col = ref_col;
1965
1966  // Baseline value at the centre
1967  bestsad = fn_ptr->sdf(what, what_stride,
1968                        bestaddress, in_what_stride, 0x7fffffff)
1969            + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
1970                             sad_per_bit);
1971
1972  // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
1973  if (col_min < x->mv_col_min)
1974    col_min = x->mv_col_min;
1975
1976  if (col_max > x->mv_col_max)
1977    col_max = x->mv_col_max;
1978
1979  if (row_min < x->mv_row_min)
1980    row_min = x->mv_row_min;
1981
1982  if (row_max > x->mv_row_max)
1983    row_max = x->mv_row_max;
1984
1985  for (r = row_min; r < row_max; r++) {
1986    this_mv.as_mv.row = r;
1987    check_here = r * mv_stride + in_what + col_min;
1988    c = col_min;
1989
1990    while ((c + 2) < col_max) {
1991      int i;
1992
1993      fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1994
1995      for (i = 0; i < 3; i++) {
1996        thissad = sad_array[i];
1997
1998        if (thissad < bestsad) {
1999          this_mv.as_mv.col = c;
2000          thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
2001                                     mvjsadcost, mvsadcost, sad_per_bit);
2002
2003          if (thissad < bestsad) {
2004            bestsad = thissad;
2005            best_mv->as_mv.row = r;
2006            best_mv->as_mv.col = c;
2007            bestaddress = check_here;
2008          }
2009        }
2010
2011        check_here++;
2012        c++;
2013      }
2014    }
2015
2016    while (c < col_max) {
2017      thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
2018
2019      if (thissad < bestsad) {
2020        this_mv.as_mv.col = c;
2021        thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
2022                                   mvjsadcost, mvsadcost, sad_per_bit);
2023
2024        if (thissad < bestsad) {
2025          bestsad = thissad;
2026          best_mv->as_mv.row = r;
2027          best_mv->as_mv.col = c;
2028          bestaddress = check_here;
2029        }
2030      }
2031
2032      check_here++;
2033      c++;
2034    }
2035
2036  }
2037
2038  this_mv.as_mv.row = best_mv->as_mv.row << 3;
2039  this_mv.as_mv.col = best_mv->as_mv.col << 3;
2040
2041  if (bestsad < INT_MAX)
2042    return
2043        fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
2044                   (unsigned int *)(&thissad)) +
2045        mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
2046                    xd->allow_high_precision_mv);
2047  else
2048    return INT_MAX;
2049}
2050
2051int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv,
2052                          int sad_per_bit, int distance,
2053                          vp9_variance_fn_ptr_t *fn_ptr,
2054                          int *mvjcost, int *mvcost[2],
2055                          int_mv *center_mv, int n) {
2056  const MACROBLOCKD* const xd = &x->e_mbd;
2057  uint8_t *what = x->plane[0].src.buf;
2058  int what_stride = x->plane[0].src.stride;
2059  uint8_t *in_what;
2060  int in_what_stride = xd->plane[0].pre[0].stride;
2061  int mv_stride = xd->plane[0].pre[0].stride;
2062  uint8_t *bestaddress;
2063  int_mv *best_mv = &x->e_mbd.mode_info_context->bmi[n].as_mv[0];
2064  int_mv this_mv;
2065  unsigned int bestsad = INT_MAX;
2066  int r, c;
2067
2068  uint8_t *check_here;
2069  unsigned int thissad;
2070
2071  int ref_row = ref_mv->as_mv.row;
2072  int ref_col = ref_mv->as_mv.col;
2073
2074  int row_min = ref_row - distance;
2075  int row_max = ref_row + distance;
2076  int col_min = ref_col - distance;
2077  int col_max = ref_col + distance;
2078
2079  DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8);
2080  unsigned int sad_array[3];
2081  int_mv fcenter_mv;
2082
2083  int *mvjsadcost = x->nmvjointsadcost;
2084  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
2085
2086  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
2087  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
2088
2089  // Work out the mid point for the search
2090  in_what = xd->plane[0].pre[0].buf;
2091  bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col;
2092
2093  best_mv->as_mv.row = ref_row;
2094  best_mv->as_mv.col = ref_col;
2095
2096  // Baseline value at the centre
2097  bestsad = fn_ptr->sdf(what, what_stride,
2098                        bestaddress, in_what_stride, 0x7fffffff)
2099            + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
2100                             sad_per_bit);
2101
2102  // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
2103  if (col_min < x->mv_col_min)
2104    col_min = x->mv_col_min;
2105
2106  if (col_max > x->mv_col_max)
2107    col_max = x->mv_col_max;
2108
2109  if (row_min < x->mv_row_min)
2110    row_min = x->mv_row_min;
2111
2112  if (row_max > x->mv_row_max)
2113    row_max = x->mv_row_max;
2114
2115  for (r = row_min; r < row_max; r++) {
2116    this_mv.as_mv.row = r;
2117    check_here = r * mv_stride + in_what + col_min;
2118    c = col_min;
2119
2120    while ((c + 7) < col_max) {
2121      int i;
2122
2123      fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
2124
2125      for (i = 0; i < 8; i++) {
2126        thissad = (unsigned int)sad_array8[i];
2127
2128        if (thissad < bestsad) {
2129          this_mv.as_mv.col = c;
2130          thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
2131                                     mvjsadcost, mvsadcost, sad_per_bit);
2132
2133          if (thissad < bestsad) {
2134            bestsad = thissad;
2135            best_mv->as_mv.row = r;
2136            best_mv->as_mv.col = c;
2137            bestaddress = check_here;
2138          }
2139        }
2140
2141        check_here++;
2142        c++;
2143      }
2144    }
2145
2146    while ((c + 2) < col_max) {
2147      int i;
2148
2149      fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
2150
2151      for (i = 0; i < 3; i++) {
2152        thissad = sad_array[i];
2153
2154        if (thissad < bestsad) {
2155          this_mv.as_mv.col = c;
2156          thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
2157                                     mvjsadcost, mvsadcost, sad_per_bit);
2158
2159          if (thissad < bestsad) {
2160            bestsad = thissad;
2161            best_mv->as_mv.row = r;
2162            best_mv->as_mv.col = c;
2163            bestaddress = check_here;
2164          }
2165        }
2166
2167        check_here++;
2168        c++;
2169      }
2170    }
2171
2172    while (c < col_max) {
2173      thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
2174
2175      if (thissad < bestsad) {
2176        this_mv.as_mv.col = c;
2177        thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
2178                                   mvjsadcost, mvsadcost, sad_per_bit);
2179
2180        if (thissad < bestsad) {
2181          bestsad = thissad;
2182          best_mv->as_mv.row = r;
2183          best_mv->as_mv.col = c;
2184          bestaddress = check_here;
2185        }
2186      }
2187
2188      check_here++;
2189      c++;
2190    }
2191  }
2192
2193  this_mv.as_mv.row = best_mv->as_mv.row << 3;
2194  this_mv.as_mv.col = best_mv->as_mv.col << 3;
2195
2196  if (bestsad < INT_MAX)
2197    return
2198        fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
2199                   (unsigned int *)(&thissad)) +
2200        mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
2201                    xd->allow_high_precision_mv);
2202  else
2203    return INT_MAX;
2204}
2205int vp9_refining_search_sad_c(MACROBLOCK *x,
2206                              int_mv *ref_mv, int error_per_bit,
2207                              int search_range, vp9_variance_fn_ptr_t *fn_ptr,
2208                              int *mvjcost, int *mvcost[2], int_mv *center_mv) {
2209  const MACROBLOCKD* const xd = &x->e_mbd;
2210  MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
2211  int i, j;
2212  int this_row_offset, this_col_offset;
2213
2214  int what_stride = x->plane[0].src.stride;
2215  int in_what_stride = xd->plane[0].pre[0].stride;
2216  uint8_t *what = x->plane[0].src.buf;
2217  uint8_t *best_address = xd->plane[0].pre[0].buf +
2218                          (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) +
2219                          ref_mv->as_mv.col;
2220  uint8_t *check_here;
2221  unsigned int thissad;
2222  int_mv this_mv;
2223  unsigned int bestsad = INT_MAX;
2224  int_mv fcenter_mv;
2225
2226  int *mvjsadcost = x->nmvjointsadcost;
2227  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
2228
2229  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
2230  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
2231
2232  bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) +
2233      mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
2234
2235  for (i = 0; i < search_range; i++) {
2236    int best_site = -1;
2237
2238    for (j = 0; j < 4; j++) {
2239      this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
2240      this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
2241
2242      if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
2243          (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) {
2244        check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + best_address;
2245        thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
2246
2247        if (thissad < bestsad) {
2248          this_mv.as_mv.row = this_row_offset;
2249          this_mv.as_mv.col = this_col_offset;
2250          thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
2251                                    mvsadcost, error_per_bit);
2252
2253          if (thissad < bestsad) {
2254            bestsad = thissad;
2255            best_site = j;
2256          }
2257        }
2258      }
2259    }
2260
2261    if (best_site == -1)
2262      break;
2263    else {
2264      ref_mv->as_mv.row += neighbors[best_site].row;
2265      ref_mv->as_mv.col += neighbors[best_site].col;
2266      best_address += (neighbors[best_site].row) * in_what_stride + neighbors[best_site].col;
2267    }
2268  }
2269
2270  this_mv.as_mv.row = ref_mv->as_mv.row << 3;
2271  this_mv.as_mv.col = ref_mv->as_mv.col << 3;
2272
2273  if (bestsad < INT_MAX)
2274    return
2275        fn_ptr->vf(what, what_stride, best_address, in_what_stride,
2276                   (unsigned int *)(&thissad)) +
2277        mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
2278                    xd->allow_high_precision_mv);
2279  else
2280    return INT_MAX;
2281}
2282
2283int vp9_refining_search_sadx4(MACROBLOCK *x,
2284                              int_mv *ref_mv, int error_per_bit,
2285                              int search_range, vp9_variance_fn_ptr_t *fn_ptr,
2286                              int *mvjcost, int *mvcost[2], int_mv *center_mv) {
2287  const MACROBLOCKD* const xd = &x->e_mbd;
2288  MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
2289  int i, j;
2290  int this_row_offset, this_col_offset;
2291
2292  int what_stride = x->plane[0].src.stride;
2293  int in_what_stride = xd->plane[0].pre[0].stride;
2294  uint8_t *what = x->plane[0].src.buf;
2295  uint8_t *best_address = xd->plane[0].pre[0].buf +
2296                          (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) +
2297                          ref_mv->as_mv.col;
2298  uint8_t *check_here;
2299  unsigned int thissad;
2300  int_mv this_mv;
2301  unsigned int bestsad = INT_MAX;
2302  int_mv fcenter_mv;
2303
2304  int *mvjsadcost = x->nmvjointsadcost;
2305  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
2306
2307  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
2308  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
2309
2310  bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) +
2311      mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
2312
2313  for (i = 0; i < search_range; i++) {
2314    int best_site = -1;
2315    int all_in = ((ref_mv->as_mv.row - 1) > x->mv_row_min) &
2316                 ((ref_mv->as_mv.row + 1) < x->mv_row_max) &
2317                 ((ref_mv->as_mv.col - 1) > x->mv_col_min) &
2318                 ((ref_mv->as_mv.col + 1) < x->mv_col_max);
2319
2320    if (all_in) {
2321      unsigned int sad_array[4];
2322      unsigned char const *block_offset[4];
2323      block_offset[0] = best_address - in_what_stride;
2324      block_offset[1] = best_address - 1;
2325      block_offset[2] = best_address + 1;
2326      block_offset[3] = best_address + in_what_stride;
2327
2328      fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
2329
2330      for (j = 0; j < 4; j++) {
2331        if (sad_array[j] < bestsad) {
2332          this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
2333          this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
2334          sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
2335                                         mvsadcost, error_per_bit);
2336
2337          if (sad_array[j] < bestsad) {
2338            bestsad = sad_array[j];
2339            best_site = j;
2340          }
2341        }
2342      }
2343    } else {
2344      for (j = 0; j < 4; j++) {
2345        this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
2346        this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
2347
2348        if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
2349            (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) {
2350          check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + best_address;
2351          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
2352
2353          if (thissad < bestsad) {
2354            this_mv.as_mv.row = this_row_offset;
2355            this_mv.as_mv.col = this_col_offset;
2356            thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
2357                                      mvsadcost, error_per_bit);
2358
2359            if (thissad < bestsad) {
2360              bestsad = thissad;
2361              best_site = j;
2362            }
2363          }
2364        }
2365      }
2366    }
2367
2368    if (best_site == -1)
2369      break;
2370    else {
2371      ref_mv->as_mv.row += neighbors[best_site].row;
2372      ref_mv->as_mv.col += neighbors[best_site].col;
2373      best_address += (neighbors[best_site].row) * in_what_stride + neighbors[best_site].col;
2374    }
2375  }
2376
2377  this_mv.as_mv.row = ref_mv->as_mv.row << 3;
2378  this_mv.as_mv.col = ref_mv->as_mv.col << 3;
2379
2380  if (bestsad < INT_MAX)
2381    return
2382        fn_ptr->vf(what, what_stride, best_address, in_what_stride,
2383                   (unsigned int *)(&thissad)) +
2384        mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
2385                    xd->allow_high_precision_mv);
2386  else
2387    return INT_MAX;
2388}
2389
2390/* This function is called when we do joint motion search in comp_inter_inter
2391 * mode.
2392 */
2393int vp9_refining_search_8p_c(MACROBLOCK *x,
2394                             int_mv *ref_mv, int error_per_bit,
2395                             int search_range, vp9_variance_fn_ptr_t *fn_ptr,
2396                             int *mvjcost, int *mvcost[2], int_mv *center_mv,
2397                             const uint8_t *second_pred, int w, int h) {
2398  const MACROBLOCKD* const xd = &x->e_mbd;
2399  MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
2400      {-1, -1}, {1, -1}, {-1, 1}, {1, 1}};
2401  int i, j;
2402  int this_row_offset, this_col_offset;
2403
2404  int what_stride = x->plane[0].src.stride;
2405  int in_what_stride = xd->plane[0].pre[0].stride;
2406  uint8_t *what = x->plane[0].src.buf;
2407  uint8_t *best_address = xd->plane[0].pre[0].buf +
2408                          (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) +
2409                          ref_mv->as_mv.col;
2410  uint8_t *check_here;
2411  unsigned int thissad;
2412  int_mv this_mv;
2413  unsigned int bestsad = INT_MAX;
2414  int_mv fcenter_mv;
2415
2416  int *mvjsadcost = x->nmvjointsadcost;
2417  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
2418
2419  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
2420  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
2421
2422  /* Get compound pred by averaging two pred blocks. */
2423  bestsad = fn_ptr->sdaf(what, what_stride, best_address, in_what_stride,
2424                         second_pred, 0x7fffffff) +
2425      mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
2426
2427  for (i = 0; i < search_range; i++) {
2428    int best_site = -1;
2429
2430    for (j = 0; j < 8; j++) {
2431      this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
2432      this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
2433
2434      if ((this_col_offset > x->mv_col_min) &&
2435          (this_col_offset < x->mv_col_max) &&
2436          (this_row_offset > x->mv_row_min) &&
2437          (this_row_offset < x->mv_row_max)) {
2438        check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
2439            best_address;
2440
2441        /* Get compound block and use it to calculate SAD. */
2442        thissad = fn_ptr->sdaf(what, what_stride, check_here, in_what_stride,
2443                               second_pred, bestsad);
2444
2445        if (thissad < bestsad) {
2446          this_mv.as_mv.row = this_row_offset;
2447          this_mv.as_mv.col = this_col_offset;
2448          thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
2449                                    mvsadcost, error_per_bit);
2450
2451          if (thissad < bestsad) {
2452            bestsad = thissad;
2453            best_site = j;
2454          }
2455        }
2456      }
2457    }
2458
2459    if (best_site == -1) {
2460      break;
2461    } else {
2462      ref_mv->as_mv.row += neighbors[best_site].row;
2463      ref_mv->as_mv.col += neighbors[best_site].col;
2464      best_address += (neighbors[best_site].row) * in_what_stride +
2465          neighbors[best_site].col;
2466    }
2467  }
2468
2469  this_mv.as_mv.row = ref_mv->as_mv.row << 3;
2470  this_mv.as_mv.col = ref_mv->as_mv.col << 3;
2471
2472  if (bestsad < INT_MAX) {
2473    // FIXME(rbultje, yunqing): add full-pixel averaging variance functions
2474    // so we don't have to use the subpixel with xoff=0,yoff=0 here.
2475    int besterr = fn_ptr->svaf(best_address, in_what_stride, 0, 0,
2476                               what, what_stride, (unsigned int *)(&thissad),
2477                               second_pred) +
2478        mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit,
2479                    xd->allow_high_precision_mv);
2480    return besterr;
2481  } else {
2482    return INT_MAX;
2483  }
2484}
2485