1/*
2 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include <limits.h>
12#include <math.h>
13#include <stdio.h>
14
15#include "./vpx_config.h"
16
17#include "vpx_mem/vpx_mem.h"
18
19#include "vp9/common/vp9_common.h"
20
21#include "vp9/encoder/vp9_encoder.h"
22#include "vp9/encoder/vp9_mcomp.h"
23
24// #define NEW_DIAMOND_SEARCH
25
26static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
27                                             const MV *mv) {
28  return &buf->buf[mv->row * buf->stride + mv->col];
29}
30
31void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) {
32  int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
33  int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
34  int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
35  int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
36
37  col_min = MAX(col_min, (MV_LOW >> 3) + 1);
38  row_min = MAX(row_min, (MV_LOW >> 3) + 1);
39  col_max = MIN(col_max, (MV_UPP >> 3) - 1);
40  row_max = MIN(row_max, (MV_UPP >> 3) - 1);
41
42  // Get intersection of UMV window and valid MV window to reduce # of checks
43  // in diamond search.
44  if (x->mv_col_min < col_min)
45    x->mv_col_min = col_min;
46  if (x->mv_col_max > col_max)
47    x->mv_col_max = col_max;
48  if (x->mv_row_min < row_min)
49    x->mv_row_min = row_min;
50  if (x->mv_row_max > row_max)
51    x->mv_row_max = row_max;
52}
53
54int vp9_init_search_range(int size) {
55  int sr = 0;
56  // Minimum search size no matter what the passed in value.
57  size = MAX(16, size);
58
59  while ((size << sr) < MAX_FULL_PEL_VAL)
60    sr++;
61
62  sr = MIN(sr, MAX_MVSEARCH_STEPS - 2);
63  return sr;
64}
65
66static INLINE int mv_cost(const MV *mv,
67                          const int *joint_cost, int *const comp_cost[2]) {
68  return joint_cost[vp9_get_mv_joint(mv)] +
69             comp_cost[0][mv->row] + comp_cost[1][mv->col];
70}
71
72int vp9_mv_bit_cost(const MV *mv, const MV *ref,
73                    const int *mvjcost, int *mvcost[2], int weight) {
74  const MV diff = { mv->row - ref->row,
75                    mv->col - ref->col };
76  return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
77}
78
79static int mv_err_cost(const MV *mv, const MV *ref,
80                       const int *mvjcost, int *mvcost[2],
81                       int error_per_bit) {
82  if (mvcost) {
83    const MV diff = { mv->row - ref->row,
84                      mv->col - ref->col };
85    return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) *
86                                  error_per_bit, 13);
87  }
88  return 0;
89}
90
91static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
92                          int error_per_bit) {
93  if (x->nmvsadcost) {
94    const MV diff = { mv->row - ref->row,
95                      mv->col - ref->col };
96    return ROUND_POWER_OF_TWO(mv_cost(&diff, x->nmvjointsadcost,
97                                      x->nmvsadcost) * error_per_bit, 8);
98  }
99  return 0;
100}
101
102void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
103  int len, ss_count = 1;
104
105  cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
106  cfg->ss[0].offset = 0;
107
108  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
109    // Generate offsets for 4 search sites per step.
110    const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}};
111    int i;
112    for (i = 0; i < 4; ++i) {
113      search_site *const ss = &cfg->ss[ss_count++];
114      ss->mv = ss_mvs[i];
115      ss->offset = ss->mv.row * stride + ss->mv.col;
116    }
117  }
118
119  cfg->ss_count = ss_count;
120  cfg->searches_per_step = 4;
121}
122
123void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
124  int len, ss_count = 1;
125
126  cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
127  cfg->ss[0].offset = 0;
128
129  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
130    // Generate offsets for 8 search sites per step.
131    const MV ss_mvs[8] = {
132      {-len,  0  }, {len,  0  }, { 0,   -len}, {0,    len},
133      {-len, -len}, {-len, len}, {len,  -len}, {len,  len}
134    };
135    int i;
136    for (i = 0; i < 8; ++i) {
137      search_site *const ss = &cfg->ss[ss_count++];
138      ss->mv = ss_mvs[i];
139      ss->offset = ss->mv.row * stride + ss->mv.col;
140    }
141  }
142
143  cfg->ss_count = ss_count;
144  cfg->searches_per_step = 8;
145}
146
147/*
148 * To avoid the penalty for crossing cache-line read, preload the reference
149 * area in a small buffer, which is aligned to make sure there won't be crossing
150 * cache-line read while reading from this buffer. This reduced the cpu
151 * cycles spent on reading ref data in sub-pixel filter functions.
152 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
153 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
154 * could reduce the area.
155 */
156
157/* estimated cost of a motion vector (r,c) */
158#define MVC(r, c)                                       \
159    (mvcost ?                                           \
160     ((mvjcost[((r) != rr) * 2 + ((c) != rc)] +         \
161       mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
162      error_per_bit + 4096) >> 13 : 0)
163
164
165// convert motion vector component to offset for svf calc
166static INLINE int sp(int x) {
167  return (x & 7) << 1;
168}
169
170static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
171  return &buf[(r >> 3) * stride + (c >> 3)];
172}
173
174/* checks if (r, c) has better score than previous best */
175#define CHECK_BETTER(v, r, c) \
176  if (c >= minc && c <= maxc && r >= minr && r <= maxr) {              \
177    if (second_pred == NULL)                                           \
178      thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
179                             src_stride, &sse);                        \
180    else                                                               \
181      thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
182                              z, src_stride, &sse, second_pred);       \
183    if ((v = MVC(r, c) + thismse) < besterr) {                         \
184      besterr = v;                                                     \
185      br = r;                                                          \
186      bc = c;                                                          \
187      *distortion = thismse;                                           \
188      *sse1 = sse;                                                     \
189    }                                                                  \
190  } else {                                                             \
191    v = INT_MAX;                                                       \
192  }
193
194#define FIRST_LEVEL_CHECKS                              \
195  {                                                     \
196    unsigned int left, right, up, down, diag;           \
197    CHECK_BETTER(left, tr, tc - hstep);                 \
198    CHECK_BETTER(right, tr, tc + hstep);                \
199    CHECK_BETTER(up, tr - hstep, tc);                   \
200    CHECK_BETTER(down, tr + hstep, tc);                 \
201    whichdir = (left < right ? 0 : 1) +                 \
202               (up < down ? 0 : 2);                     \
203    switch (whichdir) {                                 \
204      case 0:                                           \
205        CHECK_BETTER(diag, tr - hstep, tc - hstep);     \
206        break;                                          \
207      case 1:                                           \
208        CHECK_BETTER(diag, tr - hstep, tc + hstep);     \
209        break;                                          \
210      case 2:                                           \
211        CHECK_BETTER(diag, tr + hstep, tc - hstep);     \
212        break;                                          \
213      case 3:                                           \
214        CHECK_BETTER(diag, tr + hstep, tc + hstep);     \
215        break;                                          \
216    }                                                   \
217  }
218
219#define SECOND_LEVEL_CHECKS                             \
220  {                                                     \
221    int kr, kc;                                         \
222    unsigned int second;                                \
223    if (tr != br && tc != bc) {                         \
224      kr = br - tr;                                     \
225      kc = bc - tc;                                     \
226      CHECK_BETTER(second, tr + kr, tc + 2 * kc);       \
227      CHECK_BETTER(second, tr + 2 * kr, tc + kc);       \
228    } else if (tr == br && tc != bc) {                  \
229      kc = bc - tc;                                     \
230      CHECK_BETTER(second, tr + hstep, tc + 2 * kc);    \
231      CHECK_BETTER(second, tr - hstep, tc + 2 * kc);    \
232      switch (whichdir) {                               \
233        case 0:                                         \
234        case 1:                                         \
235          CHECK_BETTER(second, tr + hstep, tc + kc);    \
236          break;                                        \
237        case 2:                                         \
238        case 3:                                         \
239          CHECK_BETTER(second, tr - hstep, tc + kc);    \
240          break;                                        \
241      }                                                 \
242    } else if (tr != br && tc == bc) {                  \
243      kr = br - tr;                                     \
244      CHECK_BETTER(second, tr + 2 * kr, tc + hstep);    \
245      CHECK_BETTER(second, tr + 2 * kr, tc - hstep);    \
246      switch (whichdir) {                               \
247        case 0:                                         \
248        case 2:                                         \
249          CHECK_BETTER(second, tr + kr, tc + hstep);    \
250          break;                                        \
251        case 1:                                         \
252        case 3:                                         \
253          CHECK_BETTER(second, tr + kr, tc - hstep);    \
254          break;                                        \
255      }                                                 \
256    }                                                   \
257  }
258
259int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
260                                 MV *bestmv, const MV *ref_mv,
261                                 int allow_hp,
262                                 int error_per_bit,
263                                 const vp9_variance_fn_ptr_t *vfp,
264                                 int forced_stop,
265                                 int iters_per_step,
266                                 int *mvjcost, int *mvcost[2],
267                                 int *distortion,
268                                 unsigned int *sse1,
269                                 const uint8_t *second_pred,
270                                 int w, int h) {
271  const uint8_t *const z = x->plane[0].src.buf;
272  const int src_stride = x->plane[0].src.stride;
273  const MACROBLOCKD *xd = &x->e_mbd;
274  unsigned int besterr = INT_MAX;
275  unsigned int sse;
276  unsigned int whichdir;
277  int thismse;
278  const unsigned int halfiters = iters_per_step;
279  const unsigned int quarteriters = iters_per_step;
280  const unsigned int eighthiters = iters_per_step;
281
282  const int y_stride = xd->plane[0].pre[0].stride;
283  const int offset = bestmv->row * y_stride + bestmv->col;
284  const uint8_t *const y = xd->plane[0].pre[0].buf;
285
286  int rr = ref_mv->row;
287  int rc = ref_mv->col;
288  int br = bestmv->row * 8;
289  int bc = bestmv->col * 8;
290  int hstep = 4;
291  const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
292  const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
293  const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
294  const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
295
296  int tr = br;
297  int tc = bc;
298
299  // central mv
300  bestmv->row *= 8;
301  bestmv->col *= 8;
302
303  // calculate central point error
304  // TODO(yunqingwang): central pointer error was already calculated in full-
305  // pixel search, and can be passed in this function.
306  if (second_pred != NULL) {
307    DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
308    vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
309    besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
310  } else {
311    besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1);
312  }
313  *distortion = besterr;
314  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
315
316  // Each subsequent iteration checks at least one point in
317  // common with the last iteration could be 2 ( if diag selected)
318  // 1/2 pel
319  FIRST_LEVEL_CHECKS;
320  if (halfiters > 1) {
321    SECOND_LEVEL_CHECKS;
322  }
323  tr = br;
324  tc = bc;
325
326  // Each subsequent iteration checks at least one point in common with
327  // the last iteration could be 2 ( if diag selected) 1/4 pel
328
329  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
330  if (forced_stop != 2) {
331    hstep >>= 1;
332    FIRST_LEVEL_CHECKS;
333    if (quarteriters > 1) {
334      SECOND_LEVEL_CHECKS;
335    }
336    tr = br;
337    tc = bc;
338  }
339
340  if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
341    hstep >>= 1;
342    FIRST_LEVEL_CHECKS;
343    if (eighthiters > 1) {
344      SECOND_LEVEL_CHECKS;
345    }
346    tr = br;
347    tc = bc;
348  }
349  // These lines insure static analysis doesn't warn that
350  // tr and tc aren't used after the above point.
351  (void) tr;
352  (void) tc;
353
354  bestmv->row = br;
355  bestmv->col = bc;
356
357  if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
358      (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
359    return INT_MAX;
360
361  return besterr;
362}
363
364#undef MVC
365#undef PRE
366#undef CHECK_BETTER
367
368static INLINE int check_bounds(const MACROBLOCK *x, int row, int col,
369                               int range) {
370  return ((row - range) >= x->mv_row_min) &
371         ((row + range) <= x->mv_row_max) &
372         ((col - range) >= x->mv_col_min) &
373         ((col + range) <= x->mv_col_max);
374}
375
376static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) {
377  return (mv->col >= x->mv_col_min) && (mv->col <= x->mv_col_max) &&
378         (mv->row >= x->mv_row_min) && (mv->row <= x->mv_row_max);
379}
380
381#define CHECK_BETTER \
382  {\
383    if (thissad < bestsad) {\
384      if (use_mvcost) \
385        thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);\
386      if (thissad < bestsad) {\
387        bestsad = thissad;\
388        best_site = i;\
389      }\
390    }\
391  }
392
393#define MAX_PATTERN_SCALES         11
394#define MAX_PATTERN_CANDIDATES      8  // max number of canddiates per scale
395#define PATTERN_CANDIDATES_REF      3  // number of refinement candidates
396
397// Generic pattern search function that searches over multiple scales.
398// Each scale can have a different number of candidates and shape of
399// candidates as indicated in the num_candidates and candidates arrays
400// passed into this function
401static int vp9_pattern_search(const MACROBLOCK *x,
402                              MV *ref_mv,
403                              int search_param,
404                              int sad_per_bit,
405                              int do_init_search, int do_refine,
406                              const vp9_variance_fn_ptr_t *vfp,
407                              int use_mvcost,
408                              const MV *center_mv, MV *best_mv,
409                              const int num_candidates[MAX_PATTERN_SCALES],
410                              const MV candidates[MAX_PATTERN_SCALES]
411                                                 [MAX_PATTERN_CANDIDATES]) {
412  const MACROBLOCKD *const xd = &x->e_mbd;
413  static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
414    10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
415  };
416  int i, j, s, t;
417  const struct buf_2d *const what = &x->plane[0].src;
418  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
419  int br, bc;
420  int bestsad = INT_MAX;
421  int thissad;
422  int k = -1;
423  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
424  int best_init_s = search_param_to_steps[search_param];
425  // adjust ref_mv to make sure it is within MV range
426  clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
427  br = ref_mv->row;
428  bc = ref_mv->col;
429
430  // Work out the start point for the search
431  bestsad = vfp->sdf(what->buf, what->stride,
432                     get_buf_from_mv(in_what, ref_mv), in_what->stride) +
433      mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
434
435  // Search all possible scales upto the search param around the center point
436  // pick the scale of the point that is best as the starting scale of
437  // further steps around it.
438  if (do_init_search) {
439    s = best_init_s;
440    best_init_s = -1;
441    for (t = 0; t <= s; ++t) {
442      int best_site = -1;
443      if (check_bounds(x, br, bc, 1 << t)) {
444        for (i = 0; i < num_candidates[t]; i++) {
445          const MV this_mv = {br + candidates[t][i].row,
446                              bc + candidates[t][i].col};
447          thissad = vfp->sdf(what->buf, what->stride,
448                             get_buf_from_mv(in_what, &this_mv),
449                             in_what->stride);
450          CHECK_BETTER
451        }
452      } else {
453        for (i = 0; i < num_candidates[t]; i++) {
454          const MV this_mv = {br + candidates[t][i].row,
455                              bc + candidates[t][i].col};
456          if (!is_mv_in(x, &this_mv))
457            continue;
458          thissad = vfp->sdf(what->buf, what->stride,
459                             get_buf_from_mv(in_what, &this_mv),
460                             in_what->stride);
461          CHECK_BETTER
462        }
463      }
464      if (best_site == -1) {
465        continue;
466      } else {
467        best_init_s = t;
468        k = best_site;
469      }
470    }
471    if (best_init_s != -1) {
472      br += candidates[best_init_s][k].row;
473      bc += candidates[best_init_s][k].col;
474    }
475  }
476
477  // If the center point is still the best, just skip this and move to
478  // the refinement step.
479  if (best_init_s != -1) {
480    int best_site = -1;
481    s = best_init_s;
482
483    do {
484      // No need to search all 6 points the 1st time if initial search was used
485      if (!do_init_search || s != best_init_s) {
486        if (check_bounds(x, br, bc, 1 << s)) {
487          for (i = 0; i < num_candidates[s]; i++) {
488            const MV this_mv = {br + candidates[s][i].row,
489                                bc + candidates[s][i].col};
490            thissad = vfp->sdf(what->buf, what->stride,
491                               get_buf_from_mv(in_what, &this_mv),
492                               in_what->stride);
493            CHECK_BETTER
494          }
495        } else {
496          for (i = 0; i < num_candidates[s]; i++) {
497            const MV this_mv = {br + candidates[s][i].row,
498                                bc + candidates[s][i].col};
499            if (!is_mv_in(x, &this_mv))
500              continue;
501            thissad = vfp->sdf(what->buf, what->stride,
502                               get_buf_from_mv(in_what, &this_mv),
503                               in_what->stride);
504            CHECK_BETTER
505          }
506        }
507
508        if (best_site == -1) {
509          continue;
510        } else {
511          br += candidates[s][best_site].row;
512          bc += candidates[s][best_site].col;
513          k = best_site;
514        }
515      }
516
517      do {
518        int next_chkpts_indices[PATTERN_CANDIDATES_REF];
519        best_site = -1;
520        next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
521        next_chkpts_indices[1] = k;
522        next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
523
524        if (check_bounds(x, br, bc, 1 << s)) {
525          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
526            const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
527                                bc + candidates[s][next_chkpts_indices[i]].col};
528            thissad = vfp->sdf(what->buf, what->stride,
529                               get_buf_from_mv(in_what, &this_mv),
530                               in_what->stride);
531            CHECK_BETTER
532          }
533        } else {
534          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
535            const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
536                                bc + candidates[s][next_chkpts_indices[i]].col};
537            if (!is_mv_in(x, &this_mv))
538              continue;
539            thissad = vfp->sdf(what->buf, what->stride,
540                               get_buf_from_mv(in_what, &this_mv),
541                               in_what->stride);
542            CHECK_BETTER
543          }
544        }
545
546        if (best_site != -1) {
547          k = next_chkpts_indices[best_site];
548          br += candidates[s][k].row;
549          bc += candidates[s][k].col;
550        }
551      } while (best_site != -1);
552    } while (s--);
553  }
554
555  // Check 4 1-away neighbors if do_refine is true.
556  // For most well-designed schemes do_refine will not be necessary.
557  if (do_refine) {
558    static const MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}};
559
560    for (j = 0; j < 16; j++) {
561      int best_site = -1;
562      if (check_bounds(x, br, bc, 1)) {
563        for (i = 0; i < 4; i++) {
564          const MV this_mv = {br + neighbors[i].row,
565                              bc + neighbors[i].col};
566          thissad = vfp->sdf(what->buf, what->stride,
567                             get_buf_from_mv(in_what, &this_mv),
568                             in_what->stride);
569          CHECK_BETTER
570        }
571      } else {
572        for (i = 0; i < 4; i++) {
573          const MV this_mv = {br + neighbors[i].row,
574                              bc + neighbors[i].col};
575          if (!is_mv_in(x, &this_mv))
576            continue;
577          thissad = vfp->sdf(what->buf, what->stride,
578                             get_buf_from_mv(in_what, &this_mv),
579                             in_what->stride);
580          CHECK_BETTER
581        }
582      }
583
584      if (best_site == -1) {
585        break;
586      } else {
587        br += neighbors[best_site].row;
588        bc += neighbors[best_site].col;
589      }
590    }
591  }
592
593  best_mv->row = br;
594  best_mv->col = bc;
595
596  return bestsad;
597}
598
599int vp9_get_mvpred_var(const MACROBLOCK *x,
600                       const MV *best_mv, const MV *center_mv,
601                       const vp9_variance_fn_ptr_t *vfp,
602                       int use_mvcost) {
603  const MACROBLOCKD *const xd = &x->e_mbd;
604  const struct buf_2d *const what = &x->plane[0].src;
605  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
606  const MV mv = {best_mv->row * 8, best_mv->col * 8};
607  unsigned int unused;
608
609  return vfp->vf(what->buf, what->stride,
610                 get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) +
611      (use_mvcost ?  mv_err_cost(&mv, center_mv, x->nmvjointcost,
612                                 x->mvcost, x->errorperbit) : 0);
613}
614
615int vp9_get_mvpred_av_var(const MACROBLOCK *x,
616                          const MV *best_mv, const MV *center_mv,
617                          const uint8_t *second_pred,
618                          const vp9_variance_fn_ptr_t *vfp,
619                          int use_mvcost) {
620  const MACROBLOCKD *const xd = &x->e_mbd;
621  const struct buf_2d *const what = &x->plane[0].src;
622  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
623  const MV mv = {best_mv->row * 8, best_mv->col * 8};
624  unsigned int unused;
625
626  return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
627                   what->buf, what->stride, &unused, second_pred) +
628      (use_mvcost ?  mv_err_cost(&mv, center_mv, x->nmvjointcost,
629                                 x->mvcost, x->errorperbit) : 0);
630}
631
632int vp9_hex_search(const MACROBLOCK *x,
633                   MV *ref_mv,
634                   int search_param,
635                   int sad_per_bit,
636                   int do_init_search,
637                   const vp9_variance_fn_ptr_t *vfp,
638                   int use_mvcost,
639                   const MV *center_mv, MV *best_mv) {
640  // First scale has 8-closest points, the rest have 6 points in hex shape
641  // at increasing scales
642  static const int hex_num_candidates[MAX_PATTERN_SCALES] = {
643    8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6
644  };
645  // Note that the largest candidate step at each scale is 2^scale
646  static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
647    {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, { 0, 1}, { -1, 1}, {-1, 0}},
648    {{-1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}},
649    {{-2, -4}, {2, -4}, {4, 0}, {2, 4}, { -2, 4}, { -4, 0}},
650    {{-4, -8}, {4, -8}, {8, 0}, {4, 8}, { -4, 8}, { -8, 0}},
651    {{-8, -16}, {8, -16}, {16, 0}, {8, 16}, { -8, 16}, { -16, 0}},
652    {{-16, -32}, {16, -32}, {32, 0}, {16, 32}, { -16, 32}, { -32, 0}},
653    {{-32, -64}, {32, -64}, {64, 0}, {32, 64}, { -32, 64}, { -64, 0}},
654    {{-64, -128}, {64, -128}, {128, 0}, {64, 128}, { -64, 128}, { -128, 0}},
655    {{-128, -256}, {128, -256}, {256, 0}, {128, 256}, { -128, 256}, { -256, 0}},
656    {{-256, -512}, {256, -512}, {512, 0}, {256, 512}, { -256, 512}, { -512, 0}},
657    {{-512, -1024}, {512, -1024}, {1024, 0}, {512, 1024}, { -512, 1024},
658      { -1024, 0}},
659  };
660  return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
661                            do_init_search, 0, vfp, use_mvcost,
662                            center_mv, best_mv,
663                            hex_num_candidates, hex_candidates);
664}
665
666int vp9_bigdia_search(const MACROBLOCK *x,
667                      MV *ref_mv,
668                      int search_param,
669                      int sad_per_bit,
670                      int do_init_search,
671                      const vp9_variance_fn_ptr_t *vfp,
672                      int use_mvcost,
673                      const MV *center_mv,
674                      MV *best_mv) {
675  // First scale has 4-closest points, the rest have 8 points in diamond
676  // shape at increasing scales
677  static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
678    4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
679  };
680  // Note that the largest candidate step at each scale is 2^scale
681  static const MV bigdia_candidates[MAX_PATTERN_SCALES]
682                                   [MAX_PATTERN_CANDIDATES] = {
683    {{0, -1}, {1, 0}, { 0, 1}, {-1, 0}},
684    {{-1, -1}, {0, -2}, {1, -1}, {2, 0}, {1, 1}, {0, 2}, {-1, 1}, {-2, 0}},
685    {{-2, -2}, {0, -4}, {2, -2}, {4, 0}, {2, 2}, {0, 4}, {-2, 2}, {-4, 0}},
686    {{-4, -4}, {0, -8}, {4, -4}, {8, 0}, {4, 4}, {0, 8}, {-4, 4}, {-8, 0}},
687    {{-8, -8}, {0, -16}, {8, -8}, {16, 0}, {8, 8}, {0, 16}, {-8, 8}, {-16, 0}},
688    {{-16, -16}, {0, -32}, {16, -16}, {32, 0}, {16, 16}, {0, 32},
689      {-16, 16}, {-32, 0}},
690    {{-32, -32}, {0, -64}, {32, -32}, {64, 0}, {32, 32}, {0, 64},
691      {-32, 32}, {-64, 0}},
692    {{-64, -64}, {0, -128}, {64, -64}, {128, 0}, {64, 64}, {0, 128},
693      {-64, 64}, {-128, 0}},
694    {{-128, -128}, {0, -256}, {128, -128}, {256, 0}, {128, 128}, {0, 256},
695      {-128, 128}, {-256, 0}},
696    {{-256, -256}, {0, -512}, {256, -256}, {512, 0}, {256, 256}, {0, 512},
697      {-256, 256}, {-512, 0}},
698    {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024},
699      {-512, 512}, {-1024, 0}},
700  };
701  return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
702                            do_init_search, 0, vfp, use_mvcost,
703                            center_mv, best_mv,
704                            bigdia_num_candidates, bigdia_candidates);
705}
706
707int vp9_square_search(const MACROBLOCK *x,
708                      MV *ref_mv,
709                      int search_param,
710                      int sad_per_bit,
711                      int do_init_search,
712                      const vp9_variance_fn_ptr_t *vfp,
713                      int use_mvcost,
714                      const MV *center_mv,
715                      MV *best_mv) {
716  // All scales have 8 closest points in square shape
717  static const int square_num_candidates[MAX_PATTERN_SCALES] = {
718    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
719  };
720  // Note that the largest candidate step at each scale is 2^scale
721  static const MV square_candidates[MAX_PATTERN_SCALES]
722                                   [MAX_PATTERN_CANDIDATES] = {
723    {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}},
724    {{-2, -2}, {0, -2}, {2, -2}, {2, 0}, {2, 2}, {0, 2}, {-2, 2}, {-2, 0}},
725    {{-4, -4}, {0, -4}, {4, -4}, {4, 0}, {4, 4}, {0, 4}, {-4, 4}, {-4, 0}},
726    {{-8, -8}, {0, -8}, {8, -8}, {8, 0}, {8, 8}, {0, 8}, {-8, 8}, {-8, 0}},
727    {{-16, -16}, {0, -16}, {16, -16}, {16, 0}, {16, 16}, {0, 16},
728      {-16, 16}, {-16, 0}},
729    {{-32, -32}, {0, -32}, {32, -32}, {32, 0}, {32, 32}, {0, 32},
730      {-32, 32}, {-32, 0}},
731    {{-64, -64}, {0, -64}, {64, -64}, {64, 0}, {64, 64}, {0, 64},
732      {-64, 64}, {-64, 0}},
733    {{-128, -128}, {0, -128}, {128, -128}, {128, 0}, {128, 128}, {0, 128},
734      {-128, 128}, {-128, 0}},
735    {{-256, -256}, {0, -256}, {256, -256}, {256, 0}, {256, 256}, {0, 256},
736      {-256, 256}, {-256, 0}},
737    {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512},
738      {-512, 512}, {-512, 0}},
739    {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024},
740      {0, 1024}, {-1024, 1024}, {-1024, 0}},
741  };
742  return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
743                            do_init_search, 0, vfp, use_mvcost,
744                            center_mv, best_mv,
745                            square_num_candidates, square_candidates);
746}
747
748int vp9_fast_hex_search(const MACROBLOCK *x,
749                        MV *ref_mv,
750                        int search_param,
751                        int sad_per_bit,
752                        int do_init_search,  // must be zero for fast_hex
753                        const vp9_variance_fn_ptr_t *vfp,
754                        int use_mvcost,
755                        const MV *center_mv,
756                        MV *best_mv) {
757  return vp9_hex_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param),
758                        sad_per_bit, do_init_search, vfp, use_mvcost,
759                        center_mv, best_mv);
760}
761
762int vp9_fast_dia_search(const MACROBLOCK *x,
763                        MV *ref_mv,
764                        int search_param,
765                        int sad_per_bit,
766                        int do_init_search,
767                        const vp9_variance_fn_ptr_t *vfp,
768                        int use_mvcost,
769                        const MV *center_mv,
770                        MV *best_mv) {
771  return vp9_bigdia_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param),
772                           sad_per_bit, do_init_search, vfp, use_mvcost,
773                           center_mv, best_mv);
774}
775
776#undef CHECK_BETTER
777
778int vp9_full_range_search_c(const MACROBLOCK *x,
779                            const search_site_config *cfg,
780                            MV *ref_mv, MV *best_mv,
781                            int search_param, int sad_per_bit, int *num00,
782                            const vp9_variance_fn_ptr_t *fn_ptr,
783                            const MV *center_mv) {
784  const MACROBLOCKD *const xd = &x->e_mbd;
785  const struct buf_2d *const what = &x->plane[0].src;
786  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
787  const int range = 64;
788  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
789  unsigned int best_sad = INT_MAX;
790  int r, c, i;
791  int start_col, end_col, start_row, end_row;
792
793  // The cfg and search_param parameters are not used in this search variant
794  (void)cfg;
795  (void)search_param;
796
797  clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
798  *best_mv = *ref_mv;
799  *num00 = 11;
800  best_sad = fn_ptr->sdf(what->buf, what->stride,
801                         get_buf_from_mv(in_what, ref_mv), in_what->stride) +
802                 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
803  start_row = MAX(-range, x->mv_row_min - ref_mv->row);
804  start_col = MAX(-range, x->mv_col_min - ref_mv->col);
805  end_row = MIN(range, x->mv_row_max - ref_mv->row);
806  end_col = MIN(range, x->mv_col_max - ref_mv->col);
807
808  for (r = start_row; r <= end_row; ++r) {
809    for (c = start_col; c <= end_col; c += 4) {
810      if (c + 3 <= end_col) {
811        unsigned int sads[4];
812        const uint8_t *addrs[4];
813        for (i = 0; i < 4; ++i) {
814          const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
815          addrs[i] = get_buf_from_mv(in_what, &mv);
816        }
817
818        fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
819
820        for (i = 0; i < 4; ++i) {
821          if (sads[i] < best_sad) {
822            const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
823            const unsigned int sad = sads[i] +
824                mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
825            if (sad < best_sad) {
826              best_sad = sad;
827              *best_mv = mv;
828            }
829          }
830        }
831      } else {
832        for (i = 0; i < end_col - c; ++i) {
833          const MV mv = {ref_mv->row + r, ref_mv->col + c + i};
834          unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
835              get_buf_from_mv(in_what, &mv), in_what->stride);
836          if (sad < best_sad) {
837            sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
838            if (sad < best_sad) {
839              best_sad = sad;
840              *best_mv = mv;
841            }
842          }
843        }
844      }
845    }
846  }
847
848  return best_sad;
849}
850
851int vp9_diamond_search_sad_c(const MACROBLOCK *x,
852                             const search_site_config *cfg,
853                             MV *ref_mv, MV *best_mv, int search_param,
854                             int sad_per_bit, int *num00,
855                             const vp9_variance_fn_ptr_t *fn_ptr,
856                             const MV *center_mv) {
857  int i, j, step;
858
859  const MACROBLOCKD *const xd = &x->e_mbd;
860  uint8_t *what = x->plane[0].src.buf;
861  const int what_stride = x->plane[0].src.stride;
862  const uint8_t *in_what;
863  const int in_what_stride = xd->plane[0].pre[0].stride;
864  const uint8_t *best_address;
865
866  unsigned int bestsad = INT_MAX;
867  int best_site = 0;
868  int last_site = 0;
869
870  int ref_row;
871  int ref_col;
872
873  // search_param determines the length of the initial step and hence the number
874  // of iterations.
875  // 0 = initial step (MAX_FIRST_STEP) pel
876  // 1 = (MAX_FIRST_STEP/2) pel,
877  // 2 = (MAX_FIRST_STEP/4) pel...
878  const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
879  const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
880
881  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
882  clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
883  ref_row = ref_mv->row;
884  ref_col = ref_mv->col;
885  *num00 = 0;
886  best_mv->row = ref_row;
887  best_mv->col = ref_col;
888
889  // Work out the start point for the search
890  in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
891  best_address = in_what;
892
893  // Check the starting position
894  bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
895                + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
896
897  i = 1;
898
899  for (step = 0; step < tot_steps; step++) {
900    int all_in = 1, t;
901
902    // All_in is true if every one of the points we are checking are within
903    // the bounds of the image.
904    all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_row_min);
905    all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_row_max);
906    all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_col_min);
907    all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_col_max);
908
909    // If all the pixels are within the bounds we don't check whether the
910    // search point is valid in this loop,  otherwise we check each point
911    // for validity..
912    if (all_in) {
913      unsigned int sad_array[4];
914
915      for (j = 0; j < cfg->searches_per_step; j += 4) {
916        unsigned char const *block_offset[4];
917
918        for (t = 0; t < 4; t++)
919          block_offset[t] = ss[i + t].offset + best_address;
920
921        fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
922                       sad_array);
923
924        for (t = 0; t < 4; t++, i++) {
925          if (sad_array[t] < bestsad) {
926            const MV this_mv = {best_mv->row + ss[i].mv.row,
927                                best_mv->col + ss[i].mv.col};
928            sad_array[t] += mvsad_err_cost(x, &this_mv, &fcenter_mv,
929                                           sad_per_bit);
930            if (sad_array[t] < bestsad) {
931              bestsad = sad_array[t];
932              best_site = i;
933            }
934          }
935        }
936      }
937    } else {
938      for (j = 0; j < cfg->searches_per_step; j++) {
939        // Trap illegal vectors
940        const MV this_mv = {best_mv->row + ss[i].mv.row,
941                            best_mv->col + ss[i].mv.col};
942
943        if (is_mv_in(x, &this_mv)) {
944          const uint8_t *const check_here = ss[i].offset + best_address;
945          unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
946                                             in_what_stride);
947
948          if (thissad < bestsad) {
949            thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
950            if (thissad < bestsad) {
951              bestsad = thissad;
952              best_site = i;
953            }
954          }
955        }
956        i++;
957      }
958    }
959    if (best_site != last_site) {
960      best_mv->row += ss[best_site].mv.row;
961      best_mv->col += ss[best_site].mv.col;
962      best_address += ss[best_site].offset;
963      last_site = best_site;
964#if defined(NEW_DIAMOND_SEARCH)
965      while (1) {
966        const MV this_mv = {best_mv->row + ss[best_site].mv.row,
967                            best_mv->col + ss[best_site].mv.col};
968        if (is_mv_in(x, &this_mv)) {
969          const uint8_t *const check_here = ss[best_site].offset + best_address;
970          unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
971                                             in_what_stride);
972          if (thissad < bestsad) {
973            thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
974            if (thissad < bestsad) {
975              bestsad = thissad;
976              best_mv->row += ss[best_site].mv.row;
977              best_mv->col += ss[best_site].mv.col;
978              best_address += ss[best_site].offset;
979              continue;
980            }
981          }
982        }
983        break;
984      };
985#endif
986    } else if (best_address == in_what) {
987      (*num00)++;
988    }
989  }
990  return bestsad;
991}
992
993/* do_refine: If last step (1-away) of n-step search doesn't pick the center
994              point as the best match, we will do a final 1-away diamond
995              refining search  */
996
997int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
998                           MV *mvp_full, int step_param,
999                           int sadpb, int further_steps, int do_refine,
1000                           const vp9_variance_fn_ptr_t *fn_ptr,
1001                           const MV *ref_mv, MV *dst_mv) {
1002  MV temp_mv;
1003  int thissme, n, num00 = 0;
1004  int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
1005                                        step_param, sadpb, &n,
1006                                        fn_ptr, ref_mv);
1007  if (bestsme < INT_MAX)
1008    bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
1009  *dst_mv = temp_mv;
1010
1011  // If there won't be more n-step search, check to see if refining search is
1012  // needed.
1013  if (n > further_steps)
1014    do_refine = 0;
1015
1016  while (n < further_steps) {
1017    ++n;
1018
1019    if (num00) {
1020      num00--;
1021    } else {
1022      thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
1023                                        step_param + n, sadpb, &num00,
1024                                        fn_ptr, ref_mv);
1025      if (thissme < INT_MAX)
1026        thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
1027
1028      // check to see if refining search is needed.
1029      if (num00 > further_steps - n)
1030        do_refine = 0;
1031
1032      if (thissme < bestsme) {
1033        bestsme = thissme;
1034        *dst_mv = temp_mv;
1035      }
1036    }
1037  }
1038
1039  // final 1-away diamond refining search
1040  if (do_refine) {
1041    const int search_range = 8;
1042    MV best_mv = *dst_mv;
1043    thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range,
1044                                       fn_ptr, ref_mv);
1045    if (thissme < INT_MAX)
1046      thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
1047    if (thissme < bestsme) {
1048      bestsme = thissme;
1049      *dst_mv = best_mv;
1050    }
1051  }
1052  return bestsme;
1053}
1054
1055int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
1056                          int sad_per_bit, int distance,
1057                          const vp9_variance_fn_ptr_t *fn_ptr,
1058                          const MV *center_mv, MV *best_mv) {
1059  int r, c;
1060  const MACROBLOCKD *const xd = &x->e_mbd;
1061  const struct buf_2d *const what = &x->plane[0].src;
1062  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1063  const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
1064  const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
1065  const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
1066  const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
1067  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1068  int best_sad = fn_ptr->sdf(what->buf, what->stride,
1069      get_buf_from_mv(in_what, ref_mv), in_what->stride) +
1070      mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
1071  *best_mv = *ref_mv;
1072
1073  for (r = row_min; r < row_max; ++r) {
1074    for (c = col_min; c < col_max; ++c) {
1075      const MV mv = {r, c};
1076      const int sad = fn_ptr->sdf(what->buf, what->stride,
1077          get_buf_from_mv(in_what, &mv), in_what->stride) +
1078              mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
1079      if (sad < best_sad) {
1080        best_sad = sad;
1081        *best_mv = mv;
1082      }
1083    }
1084  }
1085  return best_sad;
1086}
1087
1088int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
1089                          int sad_per_bit, int distance,
1090                          const vp9_variance_fn_ptr_t *fn_ptr,
1091                          const MV *center_mv, MV *best_mv) {
1092  int r;
1093  const MACROBLOCKD *const xd = &x->e_mbd;
1094  const struct buf_2d *const what = &x->plane[0].src;
1095  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1096  const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
1097  const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
1098  const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
1099  const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
1100  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1101  unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
1102      get_buf_from_mv(in_what, ref_mv), in_what->stride) +
1103      mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
1104  *best_mv = *ref_mv;
1105
1106  for (r = row_min; r < row_max; ++r) {
1107    int c = col_min;
1108    const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
1109
1110    if (fn_ptr->sdx3f != NULL) {
1111      while ((c + 2) < col_max) {
1112        int i;
1113        unsigned int sads[3];
1114
1115        fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
1116                      sads);
1117
1118        for (i = 0; i < 3; ++i) {
1119          unsigned int sad = sads[i];
1120          if (sad < best_sad) {
1121            const MV mv = {r, c};
1122            sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
1123            if (sad < best_sad) {
1124              best_sad = sad;
1125              *best_mv = mv;
1126            }
1127          }
1128          ++check_here;
1129          ++c;
1130        }
1131      }
1132    }
1133
1134    while (c < col_max) {
1135      unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
1136                                     check_here, in_what->stride);
1137      if (sad < best_sad) {
1138        const MV mv = {r, c};
1139        sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
1140        if (sad < best_sad) {
1141          best_sad = sad;
1142          *best_mv = mv;
1143        }
1144      }
1145      ++check_here;
1146      ++c;
1147    }
1148  }
1149
1150  return best_sad;
1151}
1152
1153int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
1154                          int sad_per_bit, int distance,
1155                          const vp9_variance_fn_ptr_t *fn_ptr,
1156                          const MV *center_mv, MV *best_mv) {
1157  int r;
1158  const MACROBLOCKD *const xd = &x->e_mbd;
1159  const struct buf_2d *const what = &x->plane[0].src;
1160  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1161  const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
1162  const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
1163  const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
1164  const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
1165  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1166  unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
1167      get_buf_from_mv(in_what, ref_mv), in_what->stride) +
1168      mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
1169  *best_mv = *ref_mv;
1170
1171  for (r = row_min; r < row_max; ++r) {
1172    int c = col_min;
1173    const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
1174
1175    if (fn_ptr->sdx8f != NULL) {
1176      while ((c + 7) < col_max) {
1177        int i;
1178        unsigned int sads[8];
1179
1180        fn_ptr->sdx8f(what->buf, what->stride, check_here, in_what->stride,
1181                      sads);
1182
1183        for (i = 0; i < 8; ++i) {
1184          unsigned int sad = sads[i];
1185          if (sad < best_sad) {
1186            const MV mv = {r, c};
1187            sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
1188            if (sad < best_sad) {
1189              best_sad = sad;
1190              *best_mv = mv;
1191            }
1192          }
1193          ++check_here;
1194          ++c;
1195        }
1196      }
1197    }
1198
1199    if (fn_ptr->sdx3f != NULL) {
1200      while ((c + 2) < col_max) {
1201        int i;
1202        unsigned int sads[3];
1203
1204        fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
1205                      sads);
1206
1207        for (i = 0; i < 3; ++i) {
1208          unsigned int sad = sads[i];
1209          if (sad < best_sad) {
1210            const MV mv = {r, c};
1211            sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
1212            if (sad < best_sad) {
1213              best_sad = sad;
1214              *best_mv = mv;
1215            }
1216          }
1217          ++check_here;
1218          ++c;
1219        }
1220      }
1221    }
1222
1223    while (c < col_max) {
1224      unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
1225                                     check_here, in_what->stride);
1226      if (sad < best_sad) {
1227        const MV mv = {r, c};
1228        sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
1229        if (sad < best_sad) {
1230          best_sad = sad;
1231          *best_mv = mv;
1232        }
1233      }
1234      ++check_here;
1235      ++c;
1236    }
1237  }
1238
1239  return best_sad;
1240}
1241
1242int vp9_refining_search_sad_c(const MACROBLOCK *x,
1243                              MV *ref_mv, int error_per_bit,
1244                              int search_range,
1245                              const vp9_variance_fn_ptr_t *fn_ptr,
1246                              const MV *center_mv) {
1247  const MACROBLOCKD *const xd = &x->e_mbd;
1248  const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
1249  const struct buf_2d *const what = &x->plane[0].src;
1250  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1251  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1252  const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
1253  unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address,
1254                                    in_what->stride) +
1255      mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
1256  int i, j;
1257
1258  for (i = 0; i < search_range; i++) {
1259    int best_site = -1;
1260    const int all_in = ((ref_mv->row - 1) > x->mv_row_min) &
1261                       ((ref_mv->row + 1) < x->mv_row_max) &
1262                       ((ref_mv->col - 1) > x->mv_col_min) &
1263                       ((ref_mv->col + 1) < x->mv_col_max);
1264
1265    if (all_in) {
1266      unsigned int sads[4];
1267      const uint8_t *const positions[4] = {
1268        best_address - in_what->stride,
1269        best_address - 1,
1270        best_address + 1,
1271        best_address + in_what->stride
1272      };
1273
1274      fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
1275
1276      for (j = 0; j < 4; ++j) {
1277        if (sads[j] < best_sad) {
1278          const MV mv = {ref_mv->row + neighbors[j].row,
1279                         ref_mv->col + neighbors[j].col};
1280          sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
1281          if (sads[j] < best_sad) {
1282            best_sad = sads[j];
1283            best_site = j;
1284          }
1285        }
1286      }
1287    } else {
1288      for (j = 0; j < 4; ++j) {
1289        const MV mv = {ref_mv->row + neighbors[j].row,
1290                       ref_mv->col + neighbors[j].col};
1291
1292        if (is_mv_in(x, &mv)) {
1293          unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
1294                                         get_buf_from_mv(in_what, &mv),
1295                                         in_what->stride);
1296          if (sad < best_sad) {
1297            sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
1298            if (sad < best_sad) {
1299              best_sad = sad;
1300              best_site = j;
1301            }
1302          }
1303        }
1304      }
1305    }
1306
1307    if (best_site == -1) {
1308      break;
1309    } else {
1310      ref_mv->row += neighbors[best_site].row;
1311      ref_mv->col += neighbors[best_site].col;
1312      best_address = get_buf_from_mv(in_what, ref_mv);
1313    }
1314  }
1315
1316  return best_sad;
1317}
1318
1319// This function is called when we do joint motion search in comp_inter_inter
1320// mode.
1321int vp9_refining_search_8p_c(const MACROBLOCK *x,
1322                             MV *ref_mv, int error_per_bit,
1323                             int search_range,
1324                             const vp9_variance_fn_ptr_t *fn_ptr,
1325                             const MV *center_mv,
1326                             const uint8_t *second_pred) {
1327  const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
1328                           {-1, -1}, {1, -1}, {-1, 1}, {1, 1}};
1329  const MACROBLOCKD *const xd = &x->e_mbd;
1330  const struct buf_2d *const what = &x->plane[0].src;
1331  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1332  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1333  unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride,
1334      get_buf_from_mv(in_what, ref_mv), in_what->stride, second_pred) +
1335      mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
1336  int i, j;
1337
1338  for (i = 0; i < search_range; ++i) {
1339    int best_site = -1;
1340
1341    for (j = 0; j < 8; ++j) {
1342      const MV mv = {ref_mv->row + neighbors[j].row,
1343                     ref_mv->col + neighbors[j].col};
1344
1345      if (is_mv_in(x, &mv)) {
1346        unsigned int sad = fn_ptr->sdaf(what->buf, what->stride,
1347            get_buf_from_mv(in_what, &mv), in_what->stride, second_pred);
1348        if (sad < best_sad) {
1349          sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
1350          if (sad < best_sad) {
1351            best_sad = sad;
1352            best_site = j;
1353          }
1354        }
1355      }
1356    }
1357
1358    if (best_site == -1) {
1359      break;
1360    } else {
1361      ref_mv->row += neighbors[best_site].row;
1362      ref_mv->col += neighbors[best_site].col;
1363    }
1364  }
1365  return best_sad;
1366}
1367
1368int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x,
1369                          BLOCK_SIZE bsize, MV *mvp_full,
1370                          int step_param, int error_per_bit,
1371                          const MV *ref_mv, MV *tmp_mv,
1372                          int var_max, int rd) {
1373  const SPEED_FEATURES *const sf = &cpi->sf;
1374  const SEARCH_METHODS method = sf->mv.search_method;
1375  vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
1376  int var = 0;
1377
1378  switch (method) {
1379    case FAST_DIAMOND:
1380      var = vp9_fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
1381                                fn_ptr, 1, ref_mv, tmp_mv);
1382      break;
1383    case FAST_HEX:
1384      var = vp9_fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
1385                                fn_ptr, 1, ref_mv, tmp_mv);
1386      break;
1387    case HEX:
1388      var = vp9_hex_search(x, mvp_full, step_param, error_per_bit, 1,
1389                           fn_ptr, 1, ref_mv, tmp_mv);
1390      break;
1391    case SQUARE:
1392      var = vp9_square_search(x, mvp_full, step_param, error_per_bit, 1,
1393                              fn_ptr, 1, ref_mv, tmp_mv);
1394      break;
1395    case BIGDIA:
1396      var = vp9_bigdia_search(x, mvp_full, step_param, error_per_bit, 1,
1397                              fn_ptr, 1, ref_mv, tmp_mv);
1398      break;
1399    case NSTEP:
1400      var = vp9_full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
1401                                   MAX_MVSEARCH_STEPS - 1 - step_param,
1402                                   1, fn_ptr, ref_mv, tmp_mv);
1403      break;
1404    default:
1405      assert(!"Invalid search method.");
1406  }
1407
1408  if (method != NSTEP && rd && var < var_max)
1409    var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1);
1410
1411  return var;
1412}
1413