1ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/*
2ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *
4ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  Use of this source code is governed by a BSD-style license
5ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  that can be found in the LICENSE file in the root of the source
6ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  tree. An additional intellectual property rights grant can be found
7ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  in the file PATENTS.  All contributing project authors may
8ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  be found in the AUTHORS file in the root of the source tree.
9ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */
10ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
11ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include <limits.h>
12ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include <math.h>
131184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#include <stdio.h>
14ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
15ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "./vpx_config.h"
161184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
171184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#include "vpx_mem/vpx_mem.h"
181184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
19ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp9/common/vp9_common.h"
20ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
211184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#include "vp9/encoder/vp9_onyx_int.h"
221184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#include "vp9/encoder/vp9_mcomp.h"
231184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
2491037db265ecdd914a26e056cf69207b4f50924ehkuang// #define NEW_DIAMOND_SEARCH
2591037db265ecdd914a26e056cf69207b4f50924ehkuang
266ac915abcdb404a00d927fe6308a47fcf09d9519hkuangstatic INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
276ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                                             const MV *mv) {
286ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  return &buf->buf[mv->row * buf->stride + mv->col];
296ac915abcdb404a00d927fe6308a47fcf09d9519hkuang}
306ac915abcdb404a00d927fe6308a47fcf09d9519hkuang
31b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianvoid vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) {
32b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
33b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
34b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
35b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
36b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
37b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  col_min = MAX(col_min, (MV_LOW >> 3) + 1);
38b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  row_min = MAX(row_min, (MV_LOW >> 3) + 1);
39b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  col_max = MIN(col_max, (MV_UPP >> 3) - 1);
40b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  row_max = MIN(row_max, (MV_UPP >> 3) - 1);
41ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
421184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Get intersection of UMV window and valid MV window to reduce # of checks
431184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // in diamond search.
44ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (x->mv_col_min < col_min)
45ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->mv_col_min = col_min;
46ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (x->mv_col_max > col_max)
47ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->mv_col_max = col_max;
48ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (x->mv_row_min < row_min)
49ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->mv_row_min = row_min;
50ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (x->mv_row_max > row_max)
51ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->mv_row_max = row_max;
52ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
53ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
5491037db265ecdd914a26e056cf69207b4f50924ehkuangint vp9_init_search_range(VP9_COMP *cpi, int size) {
55ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int sr = 0;
56ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
5791037db265ecdd914a26e056cf69207b4f50924ehkuang  // Minimum search size no matter what the passed in value.
5891037db265ecdd914a26e056cf69207b4f50924ehkuang  size = MAX(16, size);
5991037db265ecdd914a26e056cf69207b4f50924ehkuang
6091037db265ecdd914a26e056cf69207b4f50924ehkuang  while ((size << sr) < MAX_FULL_PEL_VAL)
61ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    sr++;
62ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
6391037db265ecdd914a26e056cf69207b4f50924ehkuang  sr += cpi->sf.reduce_first_step_size;
6491037db265ecdd914a26e056cf69207b4f50924ehkuang  sr = MIN(sr, (cpi->sf.max_step_search_steps - 2));
65ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return sr;
66ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
67ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
685ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic INLINE int mv_cost(const MV *mv,
695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                          const int *joint_cost, int *comp_cost[2]) {
705ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  return joint_cost[vp9_get_mv_joint(mv)] +
715ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang             comp_cost[0][mv->row] + comp_cost[1][mv->col];
72ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
73ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangint vp9_mv_bit_cost(const MV *mv, const MV *ref,
755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                    const int *mvjcost, int *mvcost[2], int weight) {
765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  const MV diff = { mv->row - ref->row,
775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                    mv->col - ref->col };
785ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
795ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang}
805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
815ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic int mv_err_cost(const MV *mv, const MV *ref,
825ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                       const int *mvjcost, int *mvcost[2],
83f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang                       int error_per_bit) {
84ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (mvcost) {
855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    const MV diff = { mv->row - ref->row,
865ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                      mv->col - ref->col };
875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) *
885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                  error_per_bit, 13);
89ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
90ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return 0;
91ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
92ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuangstatic int mvsad_err_cost(const MV *mv, const MV *ref,
945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                          const int *mvjsadcost, int *mvsadcost[2],
955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                          int error_per_bit) {
96ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (mvsadcost) {
975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    const MV diff = { mv->row - ref->row,
985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                      mv->col - ref->col };
995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjsadcost, mvsadcost) *
1005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                  error_per_bit, 8);
101ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
102ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return 0;
103ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
104ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
105ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangvoid vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
106b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  int len, ss_count = 1;
107ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
108b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  x->ss[0].mv.col = x->ss[0].mv.row = 0;
109b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  x->ss[0].offset = 0;
110ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
111ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
112b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    // Generate offsets for 4 search sites per step.
113b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}};
114b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    int i;
115b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    for (i = 0; i < 4; ++i) {
116b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      search_site *const ss = &x->ss[ss_count++];
117b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      ss->mv = ss_mvs[i];
118b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      ss->offset = ss->mv.row * stride + ss->mv.col;
119b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    }
120ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
121ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
122b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  x->ss_count = ss_count;
123ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  x->searches_per_step = 4;
124ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
125ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
126ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangvoid vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
1275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int len, ss_count = 1;
128ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  x->ss[0].mv.col = x->ss[0].mv.row = 0;
1305ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  x->ss[0].offset = 0;
131ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
132ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
1335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    // Generate offsets for 8 search sites per step.
1345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    const MV ss_mvs[8] = {
1355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      {-len,  0  }, {len,  0  }, { 0,   -len}, {0,    len},
1365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      {-len, -len}, {-len, len}, {len,  -len}, {len,  len}
1375ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    };
1385ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    int i;
1395ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    for (i = 0; i < 8; ++i) {
1405ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      search_site *const ss = &x->ss[ss_count++];
1415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      ss->mv = ss_mvs[i];
1425ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      ss->offset = ss->mv.row * stride + ss->mv.col;
1435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    }
144ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
145ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  x->ss_count = ss_count;
147ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  x->searches_per_step = 8;
148ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
149ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
150ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/*
151ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * To avoid the penalty for crossing cache-line read, preload the reference
152ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * area in a small buffer, which is aligned to make sure there won't be crossing
153ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * cache-line read while reading from this buffer. This reduced the cpu
154ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * cycles spent on reading ref data in sub-pixel filter functions.
155ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
156ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
157ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * could reduce the area.
158ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */
159ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
160ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/* estimated cost of a motion vector (r,c) */
161ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#define MVC(r, c)                                       \
162ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    (mvcost ?                                           \
163ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     ((mvjcost[((r) != rr) * 2 + ((c) != rc)] +         \
164ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang       mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
165ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      error_per_bit + 4096) >> 13 : 0)
166ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
167ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
168b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian// convert motion vector component to offset for svf calc
169b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianstatic INLINE int sp(int x) {
170b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  return (x & 7) << 1;
171b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian}
172ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
173b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianstatic INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c,
174b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                                 int offset) {
175b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  return &buf[(r >> 3) * stride + (c >> 3) - offset];
176b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian}
177ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
178ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/* returns subpixel variance error function */
179ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#define DIST(r, c) \
180b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    vfp->svf(pre(y, y_stride, r, c, offset), y_stride, sp(c), sp(r), z, \
181b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian             src_stride, &sse)
182ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
183ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/* checks if (r, c) has better score than previous best */
184ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#define CHECK_BETTER(v, r, c) \
185b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  if (c >= minc && c <= maxc && r >= minr && r <= maxr) {              \
186b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    thismse = (DIST(r, c));                                            \
187b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    if ((v = MVC(r, c) + thismse) < besterr) {                         \
188b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      besterr = v;                                                     \
189b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      br = r;                                                          \
190b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      bc = c;                                                          \
191b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      *distortion = thismse;                                           \
192b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      *sse1 = sse;                                                     \
193b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    }                                                                  \
194b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  } else {                                                             \
195b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    v = INT_MAX;                                                       \
196b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  }
197ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1981184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#define FIRST_LEVEL_CHECKS                              \
1991184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  {                                                     \
2001184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    unsigned int left, right, up, down, diag;           \
2011184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    CHECK_BETTER(left, tr, tc - hstep);                 \
2021184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    CHECK_BETTER(right, tr, tc + hstep);                \
2031184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    CHECK_BETTER(up, tr - hstep, tc);                   \
2041184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    CHECK_BETTER(down, tr + hstep, tc);                 \
2051184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    whichdir = (left < right ? 0 : 1) +                 \
2061184aebb761cbeac9124c37189a80a1a58f04b6bhkuang               (up < down ? 0 : 2);                     \
2071184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    switch (whichdir) {                                 \
2081184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      case 0:                                           \
2091184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        CHECK_BETTER(diag, tr - hstep, tc - hstep);     \
2101184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        break;                                          \
2111184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      case 1:                                           \
2121184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        CHECK_BETTER(diag, tr - hstep, tc + hstep);     \
2131184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        break;                                          \
2141184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      case 2:                                           \
2151184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        CHECK_BETTER(diag, tr + hstep, tc - hstep);     \
2161184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        break;                                          \
2171184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      case 3:                                           \
2181184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        CHECK_BETTER(diag, tr + hstep, tc + hstep);     \
2191184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        break;                                          \
2201184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    }                                                   \
2211184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  }
2221184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
2231184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#define SECOND_LEVEL_CHECKS                             \
2241184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  {                                                     \
2251184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    int kr, kc;                                         \
2261184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    unsigned int second;                                \
2271184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    if (tr != br && tc != bc) {                         \
2281184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      kr = br - tr;                                     \
2291184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      kc = bc - tc;                                     \
2301184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      CHECK_BETTER(second, tr + kr, tc + 2 * kc);       \
2311184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      CHECK_BETTER(second, tr + 2 * kr, tc + kc);       \
2321184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    } else if (tr == br && tc != bc) {                  \
2331184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      kc = bc - tc;                                     \
2341184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      CHECK_BETTER(second, tr + hstep, tc + 2 * kc);    \
2351184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      CHECK_BETTER(second, tr - hstep, tc + 2 * kc);    \
2361184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      switch (whichdir) {                               \
2371184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        case 0:                                         \
2381184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        case 1:                                         \
2391184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          CHECK_BETTER(second, tr + hstep, tc + kc);    \
2401184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          break;                                        \
2411184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        case 2:                                         \
2421184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        case 3:                                         \
2431184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          CHECK_BETTER(second, tr - hstep, tc + kc);    \
2441184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          break;                                        \
2451184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      }                                                 \
2461184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    } else if (tr != br && tc == bc) {                  \
2471184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      kr = br - tr;                                     \
2481184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      CHECK_BETTER(second, tr + 2 * kr, tc + hstep);    \
2491184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      CHECK_BETTER(second, tr + 2 * kr, tc - hstep);    \
2501184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      switch (whichdir) {                               \
2511184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        case 0:                                         \
2521184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        case 2:                                         \
2531184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          CHECK_BETTER(second, tr + kr, tc + hstep);    \
2541184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          break;                                        \
2551184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        case 1:                                         \
2561184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        case 3:                                         \
2571184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          CHECK_BETTER(second, tr + kr, tc - hstep);    \
2581184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          break;                                        \
2591184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      }                                                 \
2601184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    }                                                   \
2611184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  }
2621184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
263b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianint vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
2645ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                 MV *bestmv, const MV *ref_mv,
2655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                 int allow_hp,
266ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                 int error_per_bit,
267ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                 const vp9_variance_fn_ptr_t *vfp,
2681184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                 int forced_stop,
2691184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                 int iters_per_step,
270ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                 int *mvjcost, int *mvcost[2],
271ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                 int *distortion,
2721184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                 unsigned int *sse1) {
273b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const uint8_t *z = x->plane[0].src.buf;
2745ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  const int src_stride = x->plane[0].src.stride;
275b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MACROBLOCKD *xd = &x->e_mbd;
276ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int besterr = INT_MAX;
277ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int sse;
278ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int whichdir;
279ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int thismse;
2801184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  unsigned int halfiters = iters_per_step;
2811184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  unsigned int quarteriters = iters_per_step;
2821184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  unsigned int eighthiters = iters_per_step;
283ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  const int y_stride = xd->plane[0].pre[0].stride;
2855ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  const int offset = bestmv->row * y_stride + bestmv->col;
286b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const uint8_t *y = xd->plane[0].pre[0].buf + offset;
287ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int rr = ref_mv->row;
2895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int rc = ref_mv->col;
2905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int br = bestmv->row * 8;
2915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int bc = bestmv->col * 8;
2925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int hstep = 4;
2935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
2945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
2955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
2965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
297ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int tr = br;
2995ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int tc = bc;
300ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
301ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // central mv
3025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  bestmv->row *= 8;
3035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  bestmv->col *= 8;
304ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
305ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // calculate central point error
3061184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
307ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  *distortion = besterr;
308f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
309ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
3101184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // 1/2 pel
3111184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  FIRST_LEVEL_CHECKS;
3121184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (halfiters > 1) {
3131184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    SECOND_LEVEL_CHECKS;
3141184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  }
3151184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  tr = br;
3161184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  tc = bc;
317ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
3181184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
3191184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (forced_stop != 2) {
3201184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    hstep >>= 1;
3211184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    FIRST_LEVEL_CHECKS;
3221184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    if (quarteriters > 1) {
3231184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      SECOND_LEVEL_CHECKS;
324ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
325ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    tr = br;
326ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    tc = bc;
327ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
328ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
3295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
3301184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    hstep >>= 1;
3311184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    FIRST_LEVEL_CHECKS;
3321184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    if (eighthiters > 1) {
3331184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      SECOND_LEVEL_CHECKS;
334ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
335ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    tr = br;
336ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    tc = bc;
337ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
338b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  // These lines insure static analysis doesn't warn that
339b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  // tr and tc aren't used after the above point.
340b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  (void) tr;
341b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  (void) tc;
342ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
3435ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  bestmv->row = br;
3445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  bestmv->col = bc;
345ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
3465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
3475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
348ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return INT_MAX;
349ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
350ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return besterr;
351ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
352ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
353ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#undef DIST
3541184aebb761cbeac9124c37189a80a1a58f04b6bhkuang/* returns subpixel variance error function */
3551184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#define DIST(r, c) \
356b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    vfp->svaf(pre(y, y_stride, r, c, offset), y_stride, sp(c), sp(r), \
3571184aebb761cbeac9124c37189a80a1a58f04b6bhkuang              z, src_stride, &sse, second_pred)
358ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
359b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianint vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x,
3605ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                      MV *bestmv, const MV *ref_mv,
3615ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                      int allow_hp,
3621184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      int error_per_bit,
3631184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      const vp9_variance_fn_ptr_t *vfp,
3641184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      int forced_stop,
3651184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      int iters_per_step,
3661184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      int *mvjcost, int *mvcost[2],
3671184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      int *distortion,
3681184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      unsigned int *sse1,
3691184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      const uint8_t *second_pred,
3701184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      int w, int h) {
371b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const uint8_t *z = x->plane[0].src.buf;
3725ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  const int src_stride = x->plane[0].src.stride;
373b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MACROBLOCKD *xd = &x->e_mbd;
3741184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  unsigned int besterr = INT_MAX;
375ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int sse;
3761184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  unsigned int whichdir;
377ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int thismse;
3786ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const unsigned int halfiters = iters_per_step;
3796ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const unsigned int quarteriters = iters_per_step;
3806ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const unsigned int eighthiters = iters_per_step;
381ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
3821184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
3835ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  const int y_stride = xd->plane[0].pre[0].stride;
3845ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  const int offset = bestmv->row * y_stride + bestmv->col;
385b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const uint8_t *y = xd->plane[0].pre[0].buf + offset;
3861184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
3875ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int rr = ref_mv->row;
3885ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int rc = ref_mv->col;
3895ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int br = bestmv->row * 8;
3905ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int bc = bestmv->col * 8;
3915ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int hstep = 4;
3925ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
3935ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
3945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
3955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
3961184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
3975ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int tr = br;
3985ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  int tc = bc;
3991184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
400ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // central mv
4015ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  bestmv->row *= 8;
4025ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  bestmv->col *= 8;
403ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
404ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // calculate central point error
4051184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // TODO(yunqingwang): central pointer error was already calculated in full-
4061184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // pixel search, and can be passed in this function.
4076ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  vp9_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
4081184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
4091184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  *distortion = besterr;
4101184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
411ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
4121184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Each subsequent iteration checks at least one point in
4131184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // common with the last iteration could be 2 ( if diag selected)
4141184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // 1/2 pel
4151184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  FIRST_LEVEL_CHECKS;
4161184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (halfiters > 1) {
4171184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    SECOND_LEVEL_CHECKS;
418ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
4191184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  tr = br;
4201184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  tc = bc;
421ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
4221184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Each subsequent iteration checks at least one point in common with
4231184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // the last iteration could be 2 ( if diag selected) 1/4 pel
424ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
4251184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
4261184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (forced_stop != 2) {
4271184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    hstep >>= 1;
4281184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    FIRST_LEVEL_CHECKS;
4291184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    if (quarteriters > 1) {
4301184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      SECOND_LEVEL_CHECKS;
4311184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    }
4321184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    tr = br;
4331184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    tc = bc;
434ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
435ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
4365ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) {
4371184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    hstep >>= 1;
4381184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    FIRST_LEVEL_CHECKS;
4391184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    if (eighthiters > 1) {
4401184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      SECOND_LEVEL_CHECKS;
4411184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    }
4421184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    tr = br;
4431184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    tc = bc;
444ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
445b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  // These lines insure static analysis doesn't warn that
446b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  // tr and tc aren't used after the above point.
447b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  (void) tr;
448b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  (void) tc;
449b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
4505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  bestmv->row = br;
4515ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  bestmv->col = bc;
452ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
4535ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
4545ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
4551184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    return INT_MAX;
456ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
4571184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  return besterr;
458ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
459ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
4601184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#undef MVC
4611184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#undef PRE
4621184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#undef DIST
4631184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#undef CHECK_BETTER
4641184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
465b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianstatic INLINE int check_bounds(const MACROBLOCK *x, int row, int col,
466b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                               int range) {
467b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  return ((row - range) >= x->mv_row_min) &
468b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian         ((row + range) <= x->mv_row_max) &
469b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian         ((col - range) >= x->mv_col_min) &
470b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian         ((col + range) <= x->mv_col_max);
471b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian}
472ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
473b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianstatic INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) {
474b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  return (mv->col >= x->mv_col_min) && (mv->col <= x->mv_col_max) &&
475b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian         (mv->row >= x->mv_row_min) && (mv->row <= x->mv_row_max);
476b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian}
477ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
478ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#define CHECK_BETTER \
479ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  {\
480b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    if (thissad < bestsad) {\
4811184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      if (use_mvcost) \
482b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        thissad += mvsad_err_cost(&this_mv, &fcenter_mv, \
483b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                                  mvjsadcost, mvsadcost, sad_per_bit);\
484b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      if (thissad < bestsad) {\
485ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        bestsad = thissad;\
486ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        best_site = i;\
487ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }\
488ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }\
489ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
490ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
4911184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#define MAX_PATTERN_SCALES         11
4921184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#define MAX_PATTERN_CANDIDATES      8  // max number of canddiates per scale
4931184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#define PATTERN_CANDIDATES_REF      3  // number of refinement candidates
4941184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
4951184aebb761cbeac9124c37189a80a1a58f04b6bhkuang// Generic pattern search function that searches over multiple scales.
4961184aebb761cbeac9124c37189a80a1a58f04b6bhkuang// Each scale can have a different number of candidates and shape of
4971184aebb761cbeac9124c37189a80a1a58f04b6bhkuang// candidates as indicated in the num_candidates and candidates arrays
4981184aebb761cbeac9124c37189a80a1a58f04b6bhkuang// passed into this function
499b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianstatic int vp9_pattern_search(const MACROBLOCK *x,
5005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                              MV *ref_mv,
5011184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                              int search_param,
5021184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                              int sad_per_bit,
5036ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                              int do_init_search, int do_refine,
5041184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                              const vp9_variance_fn_ptr_t *vfp,
5051184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                              int use_mvcost,
5065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                              const MV *center_mv, MV *best_mv,
5071184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                              const int num_candidates[MAX_PATTERN_SCALES],
5081184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                              const MV candidates[MAX_PATTERN_SCALES]
5091184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                                 [MAX_PATTERN_CANDIDATES]) {
510b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MACROBLOCKD *const xd = &x->e_mbd;
5111184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
5121184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
5131184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  };
5141184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int i, j, s, t;
5156ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const struct buf_2d *const what = &x->plane[0].src;
5166ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
517ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int br, bc;
5181184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int bestsad = INT_MAX;
5191184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int thissad;
520ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int k = -1;
521b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
5221184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int best_init_s = search_param_to_steps[search_param];
5236ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const int *const mvjsadcost = x->nmvjointsadcost;
5241184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
5251184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
526ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // adjust ref_mv to make sure it is within MV range
5275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
5285ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  br = ref_mv->row;
5295ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  bc = ref_mv->col;
530ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
531ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // Work out the start point for the search
5326ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  bestsad = vfp->sdf(what->buf, what->stride,
5336ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                     get_buf_from_mv(in_what, ref_mv), in_what->stride,
5346ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                     0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv,
5356ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                         mvjsadcost, mvsadcost, sad_per_bit);
536ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
5371184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Search all possible scales upto the search param around the center point
5381184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // pick the scale of the point that is best as the starting scale of
5391184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // further steps around it.
5401184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (do_init_search) {
5411184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    s = best_init_s;
5421184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    best_init_s = -1;
5431184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    for (t = 0; t <= s; ++t) {
5446ac915abcdb404a00d927fe6308a47fcf09d9519hkuang      int best_site = -1;
545b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      if (check_bounds(x, br, bc, 1 << t)) {
5461184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        for (i = 0; i < num_candidates[t]; i++) {
5476ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          const MV this_mv = {br + candidates[t][i].row,
5486ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                              bc + candidates[t][i].col};
5496ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          thissad = vfp->sdf(what->buf, what->stride,
5506ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                             get_buf_from_mv(in_what, &this_mv),
5516ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                             in_what->stride, bestsad);
5521184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          CHECK_BETTER
5531184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        }
5541184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      } else {
5551184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        for (i = 0; i < num_candidates[t]; i++) {
5566ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          const MV this_mv = {br + candidates[t][i].row,
5576ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                              bc + candidates[t][i].col};
558b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian          if (!is_mv_in(x, &this_mv))
559b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian            continue;
5606ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          thissad = vfp->sdf(what->buf, what->stride,
5616ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                             get_buf_from_mv(in_what, &this_mv),
5626ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                             in_what->stride, bestsad);
5631184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          CHECK_BETTER
5641184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        }
5651184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      }
5661184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      if (best_site == -1) {
5671184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        continue;
5681184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      } else {
5691184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        best_init_s = t;
5701184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        k = best_site;
5711184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      }
572ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
5731184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    if (best_init_s != -1) {
5741184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      br += candidates[best_init_s][k].row;
5751184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      bc += candidates[best_init_s][k].col;
576ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
577ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
578ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
5791184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // If the center point is still the best, just skip this and move to
5801184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // the refinement step.
5811184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (best_init_s != -1) {
5826ac915abcdb404a00d927fe6308a47fcf09d9519hkuang    int best_site = -1;
5831184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    s = best_init_s;
5846ac915abcdb404a00d927fe6308a47fcf09d9519hkuang
5851184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    do {
5861184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      // No need to search all 6 points the 1st time if initial search was used
5871184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      if (!do_init_search || s != best_init_s) {
588b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        if (check_bounds(x, br, bc, 1 << s)) {
5891184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          for (i = 0; i < num_candidates[s]; i++) {
5906ac915abcdb404a00d927fe6308a47fcf09d9519hkuang            const MV this_mv = {br + candidates[s][i].row,
5916ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                                bc + candidates[s][i].col};
5926ac915abcdb404a00d927fe6308a47fcf09d9519hkuang            thissad = vfp->sdf(what->buf, what->stride,
5936ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                               get_buf_from_mv(in_what, &this_mv),
5946ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                               in_what->stride, bestsad);
5951184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            CHECK_BETTER
5961184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          }
5971184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        } else {
5981184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          for (i = 0; i < num_candidates[s]; i++) {
5996ac915abcdb404a00d927fe6308a47fcf09d9519hkuang            const MV this_mv = {br + candidates[s][i].row,
6006ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                                bc + candidates[s][i].col};
601b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian            if (!is_mv_in(x, &this_mv))
602b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian              continue;
6036ac915abcdb404a00d927fe6308a47fcf09d9519hkuang            thissad = vfp->sdf(what->buf, what->stride,
6046ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                               get_buf_from_mv(in_what, &this_mv),
6056ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                               in_what->stride, bestsad);
6061184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            CHECK_BETTER
6071184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          }
6081184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        }
609ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
6101184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        if (best_site == -1) {
6111184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          continue;
6121184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        } else {
6131184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          br += candidates[s][best_site].row;
6141184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          bc += candidates[s][best_site].col;
6151184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          k = best_site;
6161184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        }
617ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
618ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
6191184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      do {
6201184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        int next_chkpts_indices[PATTERN_CANDIDATES_REF];
6211184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        best_site = -1;
622b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
623b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        next_chkpts_indices[1] = k;
624b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
6251184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
626b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        if (check_bounds(x, br, bc, 1 << s)) {
6271184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
6286ac915abcdb404a00d927fe6308a47fcf09d9519hkuang            const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
6296ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                                bc + candidates[s][next_chkpts_indices[i]].col};
6306ac915abcdb404a00d927fe6308a47fcf09d9519hkuang            thissad = vfp->sdf(what->buf, what->stride,
6316ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                               get_buf_from_mv(in_what, &this_mv),
6326ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                               in_what->stride, bestsad);
6331184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            CHECK_BETTER
6341184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          }
6351184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        } else {
6361184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
6376ac915abcdb404a00d927fe6308a47fcf09d9519hkuang            const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row,
6386ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                                bc + candidates[s][next_chkpts_indices[i]].col};
639b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian            if (!is_mv_in(x, &this_mv))
640b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian              continue;
6416ac915abcdb404a00d927fe6308a47fcf09d9519hkuang            thissad = vfp->sdf(what->buf, what->stride,
6426ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                               get_buf_from_mv(in_what, &this_mv),
6436ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                               in_what->stride, bestsad);
6441184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            CHECK_BETTER
6451184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          }
6461184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        }
6471184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
6481184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        if (best_site != -1) {
6491184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          k = next_chkpts_indices[best_site];
6501184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          br += candidates[s][k].row;
6511184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          bc += candidates[s][k].col;
6521184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        }
6531184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      } while (best_site != -1);
6541184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    } while (s--);
655ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
656ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
6571184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Check 4 1-away neighbors if do_refine is true.
6581184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // For most well-designed schemes do_refine will not be necessary.
6591184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (do_refine) {
6606ac915abcdb404a00d927fe6308a47fcf09d9519hkuang    static const MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}};
6616ac915abcdb404a00d927fe6308a47fcf09d9519hkuang
6621184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    for (j = 0; j < 16; j++) {
6636ac915abcdb404a00d927fe6308a47fcf09d9519hkuang      int best_site = -1;
664b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      if (check_bounds(x, br, bc, 1)) {
6651184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        for (i = 0; i < 4; i++) {
6666ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          const MV this_mv = {br + neighbors[i].row,
6676ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                              bc + neighbors[i].col};
6686ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          thissad = vfp->sdf(what->buf, what->stride,
6696ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                             get_buf_from_mv(in_what, &this_mv),
6706ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                             in_what->stride, bestsad);
6711184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          CHECK_BETTER
6721184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        }
6731184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      } else {
6741184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        for (i = 0; i < 4; i++) {
6756ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          const MV this_mv = {br + neighbors[i].row,
6766ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                              bc + neighbors[i].col};
677b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian          if (!is_mv_in(x, &this_mv))
678b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian            continue;
6796ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          thissad = vfp->sdf(what->buf, what->stride,
6806ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                             get_buf_from_mv(in_what, &this_mv),
6816ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                             in_what->stride, bestsad);
6821184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          CHECK_BETTER
6831184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        }
684b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      }
685ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
6861184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      if (best_site == -1) {
6871184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        break;
6881184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      } else {
6891184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        br += neighbors[best_site].row;
6901184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        bc += neighbors[best_site].col;
691ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
692ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
693ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
694ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
6955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  best_mv->row = br;
6965ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  best_mv->col = bc;
697ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
698b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  return bestsad;
699b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian}
7005ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang
701b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianint vp9_get_mvpred_var(const MACROBLOCK *x,
702b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                       const MV *best_mv, const MV *center_mv,
703b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                       const vp9_variance_fn_ptr_t *vfp,
704b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                       int use_mvcost) {
705b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MACROBLOCKD *const xd = &x->e_mbd;
7066ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const struct buf_2d *const what = &x->plane[0].src;
7076ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
708b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MV mv = {best_mv->row * 8, best_mv->col * 8};
7096ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  unsigned int unused;
7106ac915abcdb404a00d927fe6308a47fcf09d9519hkuang
7116ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  return vfp->vf(what->buf, what->stride,
7126ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                 get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) +
713b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      (use_mvcost ?  mv_err_cost(&mv, center_mv, x->nmvjointcost,
714b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                                 x->mvcost, x->errorperbit) : 0);
7151184aebb761cbeac9124c37189a80a1a58f04b6bhkuang}
7161184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
717b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianint vp9_get_mvpred_av_var(const MACROBLOCK *x,
7186ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                          const MV *best_mv, const MV *center_mv,
719b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                          const uint8_t *second_pred,
720b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                          const vp9_variance_fn_ptr_t *vfp,
721b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                          int use_mvcost) {
722b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MACROBLOCKD *const xd = &x->e_mbd;
7236ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const struct buf_2d *const what = &x->plane[0].src;
7246ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
7256ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const MV mv = {best_mv->row * 8, best_mv->col * 8};
7266ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  unsigned int unused;
7276ac915abcdb404a00d927fe6308a47fcf09d9519hkuang
7286ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
7296ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                   what->buf, what->stride, &unused, second_pred) +
7306ac915abcdb404a00d927fe6308a47fcf09d9519hkuang      (use_mvcost ?  mv_err_cost(&mv, center_mv, x->nmvjointcost,
731b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                                 x->mvcost, x->errorperbit) : 0);
732b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian}
7331184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
734b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianint vp9_hex_search(const MACROBLOCK *x,
7355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                   MV *ref_mv,
7361184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                   int search_param,
7371184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                   int sad_per_bit,
7381184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                   int do_init_search,
7391184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                   const vp9_variance_fn_ptr_t *vfp,
7401184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                   int use_mvcost,
7415ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                   const MV *center_mv, MV *best_mv) {
7421184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // First scale has 8-closest points, the rest have 6 points in hex shape
7431184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // at increasing scales
7441184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  static const int hex_num_candidates[MAX_PATTERN_SCALES] = {
7451184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6
7461184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  };
7471184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Note that the largest candidate step at each scale is 2^scale
7481184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
7491184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, { 0, 1}, { -1, 1}, {-1, 0}},
7501184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}},
7511184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-2, -4}, {2, -4}, {4, 0}, {2, 4}, { -2, 4}, { -4, 0}},
7521184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-4, -8}, {4, -8}, {8, 0}, {4, 8}, { -4, 8}, { -8, 0}},
7531184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-8, -16}, {8, -16}, {16, 0}, {8, 16}, { -8, 16}, { -16, 0}},
7541184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-16, -32}, {16, -32}, {32, 0}, {16, 32}, { -16, 32}, { -32, 0}},
7551184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-32, -64}, {32, -64}, {64, 0}, {32, 64}, { -32, 64}, { -64, 0}},
7561184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-64, -128}, {64, -128}, {128, 0}, {64, 128}, { -64, 128}, { -128, 0}},
7571184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-128, -256}, {128, -256}, {256, 0}, {128, 256}, { -128, 256}, { -256, 0}},
7581184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-256, -512}, {256, -512}, {512, 0}, {256, 512}, { -256, 512}, { -512, 0}},
7591184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-512, -1024}, {512, -1024}, {1024, 0}, {512, 1024}, { -512, 1024},
7601184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      { -1024, 0}},
7611184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  };
762b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
763b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                            do_init_search, 0, vfp, use_mvcost,
764b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                            center_mv, best_mv,
765b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                            hex_num_candidates, hex_candidates);
7661184aebb761cbeac9124c37189a80a1a58f04b6bhkuang}
7671184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
768b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianint vp9_bigdia_search(const MACROBLOCK *x,
7695ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                      MV *ref_mv,
7701184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      int search_param,
7711184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      int sad_per_bit,
7721184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      int do_init_search,
7731184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      const vp9_variance_fn_ptr_t *vfp,
7741184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      int use_mvcost,
7755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                      const MV *center_mv,
7765ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                      MV *best_mv) {
7771184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // First scale has 4-closest points, the rest have 8 points in diamond
7781184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // shape at increasing scales
7791184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
7801184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
7811184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  };
7821184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Note that the largest candidate step at each scale is 2^scale
7831184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  static const MV bigdia_candidates[MAX_PATTERN_SCALES]
7841184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                   [MAX_PATTERN_CANDIDATES] = {
7851184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{0, -1}, {1, 0}, { 0, 1}, {-1, 0}},
7861184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-1, -1}, {0, -2}, {1, -1}, {2, 0}, {1, 1}, {0, 2}, {-1, 1}, {-2, 0}},
7871184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-2, -2}, {0, -4}, {2, -2}, {4, 0}, {2, 2}, {0, 4}, {-2, 2}, {-4, 0}},
7881184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-4, -4}, {0, -8}, {4, -4}, {8, 0}, {4, 4}, {0, 8}, {-4, 4}, {-8, 0}},
7891184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-8, -8}, {0, -16}, {8, -8}, {16, 0}, {8, 8}, {0, 16}, {-8, 8}, {-16, 0}},
7901184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-16, -16}, {0, -32}, {16, -16}, {32, 0}, {16, 16}, {0, 32},
7911184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-16, 16}, {-32, 0}},
7921184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-32, -32}, {0, -64}, {32, -32}, {64, 0}, {32, 32}, {0, 64},
7931184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-32, 32}, {-64, 0}},
7941184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-64, -64}, {0, -128}, {64, -64}, {128, 0}, {64, 64}, {0, 128},
7951184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-64, 64}, {-128, 0}},
7961184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-128, -128}, {0, -256}, {128, -128}, {256, 0}, {128, 128}, {0, 256},
7971184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-128, 128}, {-256, 0}},
7981184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-256, -256}, {0, -512}, {256, -256}, {512, 0}, {256, 256}, {0, 512},
7991184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-256, 256}, {-512, 0}},
8001184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024},
8011184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-512, 512}, {-1024, 0}},
8021184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  };
8035ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
8045ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                            do_init_search, 0, vfp, use_mvcost,
8055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                            center_mv, best_mv,
8065ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                            bigdia_num_candidates, bigdia_candidates);
807ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
8081184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
809b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianint vp9_square_search(const MACROBLOCK *x,
8105ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                      MV *ref_mv,
8111184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      int search_param,
8121184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      int sad_per_bit,
8131184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      int do_init_search,
8141184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      const vp9_variance_fn_ptr_t *vfp,
8151184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      int use_mvcost,
8165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                      const MV *center_mv,
8175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                      MV *best_mv) {
8181184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // All scales have 8 closest points in square shape
8191184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  static const int square_num_candidates[MAX_PATTERN_SCALES] = {
8201184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8211184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  };
8221184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Note that the largest candidate step at each scale is 2^scale
8231184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  static const MV square_candidates[MAX_PATTERN_SCALES]
8241184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                   [MAX_PATTERN_CANDIDATES] = {
8251184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}},
8261184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-2, -2}, {0, -2}, {2, -2}, {2, 0}, {2, 2}, {0, 2}, {-2, 2}, {-2, 0}},
8271184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-4, -4}, {0, -4}, {4, -4}, {4, 0}, {4, 4}, {0, 4}, {-4, 4}, {-4, 0}},
8281184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-8, -8}, {0, -8}, {8, -8}, {8, 0}, {8, 8}, {0, 8}, {-8, 8}, {-8, 0}},
8291184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-16, -16}, {0, -16}, {16, -16}, {16, 0}, {16, 16}, {0, 16},
8301184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-16, 16}, {-16, 0}},
8311184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-32, -32}, {0, -32}, {32, -32}, {32, 0}, {32, 32}, {0, 32},
8321184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-32, 32}, {-32, 0}},
8331184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-64, -64}, {0, -64}, {64, -64}, {64, 0}, {64, 64}, {0, 64},
8341184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-64, 64}, {-64, 0}},
8351184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-128, -128}, {0, -128}, {128, -128}, {128, 0}, {128, 128}, {0, 128},
8361184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-128, 128}, {-128, 0}},
8371184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-256, -256}, {0, -256}, {256, -256}, {256, 0}, {256, 256}, {0, 256},
8381184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-256, 256}, {-256, 0}},
8391184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512},
8401184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-512, 512}, {-512, 0}},
8411184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024},
8421184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {0, 1024}, {-1024, 1024}, {-1024, 0}},
8431184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  };
8445ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
8455ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                            do_init_search, 0, vfp, use_mvcost,
8465ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                            center_mv, best_mv,
8475ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                            square_num_candidates, square_candidates);
848a72801d7d92ababb50eecf27a36bd222d031d2feVignesh Venkatasubramanian}
8491184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
850b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianint vp9_fast_hex_search(const MACROBLOCK *x,
851b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        MV *ref_mv,
852b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        int search_param,
853b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        int sad_per_bit,
854b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        int do_init_search,  // must be zero for fast_hex
855b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        const vp9_variance_fn_ptr_t *vfp,
856b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        int use_mvcost,
857b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        const MV *center_mv,
858b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        MV *best_mv) {
859b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  return vp9_hex_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param),
860b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        sad_per_bit, do_init_search, vfp, use_mvcost,
861b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        center_mv, best_mv);
862b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian}
863b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
864b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianint vp9_fast_dia_search(const MACROBLOCK *x,
865b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        MV *ref_mv,
866b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        int search_param,
867b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        int sad_per_bit,
868b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        int do_init_search,
869b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        const vp9_variance_fn_ptr_t *vfp,
870b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        int use_mvcost,
871b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        const MV *center_mv,
872b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                        MV *best_mv) {
873b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  return vp9_bigdia_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param),
874b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                           sad_per_bit, do_init_search, vfp, use_mvcost,
875b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                           center_mv, best_mv);
876b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian}
877b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
878ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#undef CHECK_BETTER
879ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
880b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianint vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
881b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                            int search_param, int sad_per_bit, int *num00,
882b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                            const vp9_variance_fn_ptr_t *fn_ptr,
883b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                            int *mvjcost, int *mvcost[2],
884b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                            const MV *center_mv) {
885b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MACROBLOCKD *const xd = &x->e_mbd;
886b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const uint8_t *what = x->plane[0].src.buf;
887b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int what_stride = x->plane[0].src.stride;
888b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const uint8_t *in_what;
889b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int in_what_stride = xd->plane[0].pre[0].stride;
890b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
891b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  unsigned int bestsad = INT_MAX;
892b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  int ref_row, ref_col;
893b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
894b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  unsigned int thissad;
895b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
896b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
897b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int *mvjsadcost = x->nmvjointsadcost;
898b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
899b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
900b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  int tr, tc;
901b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  int best_tr = 0;
902b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  int best_tc = 0;
903b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  int range = 64;
904b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
905b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  int start_col, end_col;
906b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  int start_row, end_row;
907b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  int i;
908b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
909b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
910b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  ref_row = ref_mv->row;
911b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  ref_col = ref_mv->col;
912b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  *num00 = 11;
913b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  best_mv->row = ref_row;
914b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  best_mv->col = ref_col;
915b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
916b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  // Work out the start point for the search
917b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
918b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
919b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  // Check the starting position
920b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
921b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                + mvsad_err_cost(best_mv, &fcenter_mv,
922b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                                 mvjsadcost, mvsadcost, sad_per_bit);
923b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
924b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  start_row = MAX(-range, x->mv_row_min - ref_row);
925b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  start_col = MAX(-range, x->mv_col_min - ref_col);
926b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  end_row = MIN(range, x->mv_row_max - ref_row);
927b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  end_col = MIN(range, x->mv_col_max - ref_col);
928b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
929b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  for (tr = start_row; tr <= end_row; ++tr) {
930b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    for (tc = start_col; tc <= end_col; tc += 4) {
931b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      if ((tc + 3) <= end_col) {
932b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        unsigned int sad_array[4];
933b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        unsigned char const *addr_ref[4];
934b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        for (i = 0; i < 4; ++i)
935b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian          addr_ref[i] = in_what + tr * in_what_stride + tc + i;
936b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
937b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        fn_ptr->sdx4df(what, what_stride, addr_ref, in_what_stride, sad_array);
938b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
939b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        for (i = 0; i < 4; ++i) {
940b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian          if (sad_array[i] < bestsad) {
9416ac915abcdb404a00d927fe6308a47fcf09d9519hkuang            const MV this_mv = {ref_row + tr, ref_col + tc + i};
942b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian            thissad = sad_array[i] +
943b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                      mvsad_err_cost(&this_mv, &fcenter_mv,
944b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                                      mvjsadcost, mvsadcost, sad_per_bit);
945b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian            if (thissad < bestsad) {
946b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian              bestsad = thissad;
947b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian              best_tr = tr;
948b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian              best_tc = tc + i;
949b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian            }
950b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian          }
951b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        }
952b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      } else {
953b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        for (i = 0; i < end_col - tc; ++i) {
954b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian          const uint8_t *check_here = in_what + tr * in_what_stride + tc + i;
955b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
956b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                                bestsad);
957b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
958b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian          if (thissad < bestsad) {
9596ac915abcdb404a00d927fe6308a47fcf09d9519hkuang            const MV this_mv = {ref_row + tr, ref_col + tc + i};
960b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian            thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
961b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                                      mvjsadcost, mvsadcost, sad_per_bit);
962b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
963b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian            if (thissad < bestsad) {
964b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian              bestsad = thissad;
965b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian              best_tr = tr;
966b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian              best_tc = tc + i;
967b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian            }
968b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian          }
969b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        }
970b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      }
971b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    }
972b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  }
973b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  best_mv->row += best_tr;
974b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  best_mv->col += best_tc;
975b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  return bestsad;
976b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian}
977b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
978b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianint vp9_diamond_search_sad_c(const MACROBLOCK *x,
979b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                             MV *ref_mv, MV *best_mv,
980ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                             int search_param, int sad_per_bit, int *num00,
981b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                             const vp9_variance_fn_ptr_t *fn_ptr,
982b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                             int *mvjcost, int *mvcost[2],
983b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                             const MV *center_mv) {
984b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MACROBLOCKD *const xd = &x->e_mbd;
9856ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const struct buf_2d *const what = &x->plane[0].src;
9866ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
987b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  // search_param determines the length of the initial step and hence the number
988b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  // of iterations
989b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
990b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  // (MAX_FIRST_STEP/4) pel... etc.
991b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const search_site *const ss = &x->ss[search_param * x->searches_per_step];
992b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int tot_steps = (x->ss_count / x->searches_per_step) - search_param;
993b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
994b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int *mvjsadcost = x->nmvjointsadcost;
995ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
9966ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const uint8_t *best_address;
9976ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  int best_sad = INT_MAX;
9986ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  int best_site = 0;
9996ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  int last_site = 0;
10006ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  int i, j, step;
1001ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1002b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
10036ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  best_address = get_buf_from_mv(in_what, ref_mv);
1004ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  *num00 = 0;
10056ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  *best_mv = *ref_mv;
1006ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1007ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // Check the starting position
10086ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  best_sad = fn_ptr->sdf(what->buf, what->stride,
10096ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                        in_what->buf, in_what->stride, 0x7fffffff) +
10106ac915abcdb404a00d927fe6308a47fcf09d9519hkuang      mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit);
1011ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1012ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  i = 1;
1013ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1014ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  for (step = 0; step < tot_steps; step++) {
1015ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    for (j = 0; j < x->searches_per_step; j++) {
10166ac915abcdb404a00d927fe6308a47fcf09d9519hkuang      const MV mv = {best_mv->row + ss[i].mv.row,
10176ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                     best_mv->col + ss[i].mv.col};
10186ac915abcdb404a00d927fe6308a47fcf09d9519hkuang      if (is_mv_in(x, &mv)) {
10196ac915abcdb404a00d927fe6308a47fcf09d9519hkuang       int sad = fn_ptr->sdf(what->buf, what->stride,
10206ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                             best_address + ss[i].offset, in_what->stride,
10216ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                             best_sad);
10226ac915abcdb404a00d927fe6308a47fcf09d9519hkuang        if (sad < best_sad) {
10236ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
10246ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                                sad_per_bit);
10256ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          if (sad < best_sad) {
10266ac915abcdb404a00d927fe6308a47fcf09d9519hkuang            best_sad = sad;
1027ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            best_site = i;
1028ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          }
1029ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
1030ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1031ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1032ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      i++;
1033ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1034ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1035ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (best_site != last_site) {
1036b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      best_mv->row += ss[best_site].mv.row;
1037b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      best_mv->col += ss[best_site].mv.col;
1038ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      best_address += ss[best_site].offset;
1039ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      last_site = best_site;
104091037db265ecdd914a26e056cf69207b4f50924ehkuang#if defined(NEW_DIAMOND_SEARCH)
104191037db265ecdd914a26e056cf69207b4f50924ehkuang      while (1) {
1042a72801d7d92ababb50eecf27a36bd222d031d2feVignesh Venkatasubramanian        const MV this_mv = {best_mv->row + ss[best_site].mv.row,
1043a72801d7d92ababb50eecf27a36bd222d031d2feVignesh Venkatasubramanian                            best_mv->col + ss[best_site].mv.col};
1044a72801d7d92ababb50eecf27a36bd222d031d2feVignesh Venkatasubramanian        if (is_mv_in(x, &this_mv)) {
10456ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          int sad = fn_ptr->sdf(what->buf, what->stride,
10466ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                                best_address + ss[best_site].offset,
10476ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                                in_what->stride, best_sad);
10486ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          if (sad < best_sad) {
10496ac915abcdb404a00d927fe6308a47fcf09d9519hkuang            sad += mvsad_err_cost(&this_mv, &fcenter_mv,
10506ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                                  mvjsadcost, mvsadcost, sad_per_bit);
10516ac915abcdb404a00d927fe6308a47fcf09d9519hkuang            if (sad < best_sad) {
10526ac915abcdb404a00d927fe6308a47fcf09d9519hkuang              best_sad = sad;
1053b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian              best_mv->row += ss[best_site].mv.row;
1054b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian              best_mv->col += ss[best_site].mv.col;
105591037db265ecdd914a26e056cf69207b4f50924ehkuang              best_address += ss[best_site].offset;
105691037db265ecdd914a26e056cf69207b4f50924ehkuang              continue;
105791037db265ecdd914a26e056cf69207b4f50924ehkuang            }
105891037db265ecdd914a26e056cf69207b4f50924ehkuang          }
105991037db265ecdd914a26e056cf69207b4f50924ehkuang        }
106091037db265ecdd914a26e056cf69207b4f50924ehkuang        break;
106191037db265ecdd914a26e056cf69207b4f50924ehkuang      };
106291037db265ecdd914a26e056cf69207b4f50924ehkuang#endif
10636ac915abcdb404a00d927fe6308a47fcf09d9519hkuang    } else if (best_address == in_what->buf) {
1064ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      (*num00)++;
10655ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    }
1066ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
10676ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  return best_sad;
1068ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
1069ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1070b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianint vp9_diamond_search_sadx4(const MACROBLOCK *x,
1071b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                             MV *ref_mv, MV *best_mv, int search_param,
1072ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                             int sad_per_bit, int *num00,
1073b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                             const vp9_variance_fn_ptr_t *fn_ptr,
1074b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                             int *mvjcost, int *mvcost[2],
1075b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                             const MV *center_mv) {
1076ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int i, j, step;
1077ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1078b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MACROBLOCKD *const xd = &x->e_mbd;
1079ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *what = x->plane[0].src.buf;
1080b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int what_stride = x->plane[0].src.stride;
1081b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const uint8_t *in_what;
1082b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int in_what_stride = xd->plane[0].pre[0].stride;
1083b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const uint8_t *best_address;
1084ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1085ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int bestsad = INT_MAX;
1086ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int best_site = 0;
1087ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int last_site = 0;
1088ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1089ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int ref_row;
1090ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int ref_col;
1091ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1092b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  // search_param determines the length of the initial step and hence the number
1093b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  // of iterations.
1094b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  // 0 = initial step (MAX_FIRST_STEP) pel
1095b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  // 1 = (MAX_FIRST_STEP/2) pel,
1096b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  // 2 = (MAX_FIRST_STEP/4) pel...
1097b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const search_site *ss = &x->ss[search_param * x->searches_per_step];
1098b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1099b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
1100b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1101ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1102b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int *mvjsadcost = x->nmvjointsadcost;
1103ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1104ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1105b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1106b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  ref_row = ref_mv->row;
1107b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  ref_col = ref_mv->col;
1108ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  *num00 = 0;
1109b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  best_mv->row = ref_row;
1110b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  best_mv->col = ref_col;
1111ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1112ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // Work out the start point for the search
1113b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
1114ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  best_address = in_what;
1115ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1116ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // Check the starting position
11175ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang  bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
1118b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                + mvsad_err_cost(best_mv, &fcenter_mv,
11195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                 mvjsadcost, mvsadcost, sad_per_bit);
1120ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1121ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  i = 1;
1122ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1123ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  for (step = 0; step < tot_steps; step++) {
1124ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    int all_in = 1, t;
1125ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
11265ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    // All_in is true if every one of the points we are checking are within
11275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    // the bounds of the image.
1128b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_row_min);
1129b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_row_max);
1130b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_col_min);
1131b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_col_max);
1132ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
11335ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    // If all the pixels are within the bounds we don't check whether the
11345ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    // search point is valid in this loop,  otherwise we check each point
11355ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    // for validity..
1136ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (all_in) {
1137ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      unsigned int sad_array[4];
1138ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1139ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      for (j = 0; j < x->searches_per_step; j += 4) {
1140ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        unsigned char const *block_offset[4];
1141ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1142ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        for (t = 0; t < 4; t++)
1143ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          block_offset[t] = ss[i + t].offset + best_address;
1144ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1145ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1146ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                       sad_array);
1147ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1148ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        for (t = 0; t < 4; t++, i++) {
1149ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          if (sad_array[t] < bestsad) {
1150a72801d7d92ababb50eecf27a36bd222d031d2feVignesh Venkatasubramanian            const MV this_mv = {best_mv->row + ss[i].mv.row,
1151a72801d7d92ababb50eecf27a36bd222d031d2feVignesh Venkatasubramanian                                best_mv->col + ss[i].mv.col};
1152b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian            sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
1153ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                           mvjsadcost, mvsadcost, sad_per_bit);
1154ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1155ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            if (sad_array[t] < bestsad) {
1156ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang              bestsad = sad_array[t];
1157ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang              best_site = i;
1158ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            }
1159ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          }
1160ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
1161ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1162ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    } else {
1163ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      for (j = 0; j < x->searches_per_step; j++) {
1164ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        // Trap illegal vectors
1165a72801d7d92ababb50eecf27a36bd222d031d2feVignesh Venkatasubramanian        const MV this_mv = {best_mv->row + ss[i].mv.row,
1166a72801d7d92ababb50eecf27a36bd222d031d2feVignesh Venkatasubramanian                            best_mv->col + ss[i].mv.col};
1167ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1168a72801d7d92ababb50eecf27a36bd222d031d2feVignesh Venkatasubramanian        if (is_mv_in(x, &this_mv)) {
1169b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian          const uint8_t *const check_here = ss[i].offset + best_address;
1170a72801d7d92ababb50eecf27a36bd222d031d2feVignesh Venkatasubramanian          unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
1171a72801d7d92ababb50eecf27a36bd222d031d2feVignesh Venkatasubramanian                                             in_what_stride, bestsad);
1172ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1173ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          if (thissad < bestsad) {
1174b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian            thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1175ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                      mvjsadcost, mvsadcost, sad_per_bit);
1176ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1177ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            if (thissad < bestsad) {
1178ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang              bestsad = thissad;
1179ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang              best_site = i;
1180ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            }
1181ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          }
1182ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
1183ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        i++;
1184ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1185ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1186ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (best_site != last_site) {
1187b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      best_mv->row += ss[best_site].mv.row;
1188b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      best_mv->col += ss[best_site].mv.col;
1189ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      best_address += ss[best_site].offset;
1190ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      last_site = best_site;
119191037db265ecdd914a26e056cf69207b4f50924ehkuang#if defined(NEW_DIAMOND_SEARCH)
119291037db265ecdd914a26e056cf69207b4f50924ehkuang      while (1) {
1193a72801d7d92ababb50eecf27a36bd222d031d2feVignesh Venkatasubramanian        const MV this_mv = {best_mv->row + ss[best_site].mv.row,
1194a72801d7d92ababb50eecf27a36bd222d031d2feVignesh Venkatasubramanian                            best_mv->col + ss[best_site].mv.col};
1195a72801d7d92ababb50eecf27a36bd222d031d2feVignesh Venkatasubramanian        if (is_mv_in(x, &this_mv)) {
1196a72801d7d92ababb50eecf27a36bd222d031d2feVignesh Venkatasubramanian          const uint8_t *const check_here = ss[best_site].offset + best_address;
1197a72801d7d92ababb50eecf27a36bd222d031d2feVignesh Venkatasubramanian          unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here,
1198a72801d7d92ababb50eecf27a36bd222d031d2feVignesh Venkatasubramanian                                             in_what_stride, bestsad);
119991037db265ecdd914a26e056cf69207b4f50924ehkuang          if (thissad < bestsad) {
1200b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian            thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
120191037db265ecdd914a26e056cf69207b4f50924ehkuang                                      mvjsadcost, mvsadcost, sad_per_bit);
120291037db265ecdd914a26e056cf69207b4f50924ehkuang            if (thissad < bestsad) {
120391037db265ecdd914a26e056cf69207b4f50924ehkuang              bestsad = thissad;
1204b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian              best_mv->row += ss[best_site].mv.row;
1205b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian              best_mv->col += ss[best_site].mv.col;
120691037db265ecdd914a26e056cf69207b4f50924ehkuang              best_address += ss[best_site].offset;
120791037db265ecdd914a26e056cf69207b4f50924ehkuang              continue;
120891037db265ecdd914a26e056cf69207b4f50924ehkuang            }
120991037db265ecdd914a26e056cf69207b4f50924ehkuang          }
121091037db265ecdd914a26e056cf69207b4f50924ehkuang        }
121191037db265ecdd914a26e056cf69207b4f50924ehkuang        break;
121291037db265ecdd914a26e056cf69207b4f50924ehkuang      };
121391037db265ecdd914a26e056cf69207b4f50924ehkuang#endif
12145ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    } else if (best_address == in_what) {
1215ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      (*num00)++;
12165ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    }
1217ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
1218b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  return bestsad;
1219ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
1220ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1221ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/* do_refine: If last step (1-away) of n-step search doesn't pick the center
1222ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang              point as the best match, we will do a final 1-away diamond
1223ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang              refining search  */
122491037db265ecdd914a26e056cf69207b4f50924ehkuang
1225b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianint vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
1226b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                           MV *mvp_full, int step_param,
1227b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                           int sadpb, int further_steps, int do_refine,
1228b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                           const vp9_variance_fn_ptr_t *fn_ptr,
1229b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                           const MV *ref_mv, MV *dst_mv) {
1230b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  MV temp_mv;
1231b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  int thissme, n, num00 = 0;
1232ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
1233b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                                        step_param, sadpb, &n,
1234ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                        fn_ptr, x->nmvjointcost,
1235ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                        x->mvcost, ref_mv);
1236b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  if (bestsme < INT_MAX)
1237b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
1238b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  *dst_mv = temp_mv;
1239ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1240b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  // If there won't be more n-step search, check to see if refining search is
1241b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  // needed.
1242ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (n > further_steps)
1243ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    do_refine = 0;
1244ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1245ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  while (n < further_steps) {
1246b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    ++n;
1247ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
12485ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    if (num00) {
1249ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      num00--;
12505ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    } else {
1251ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
1252ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                        step_param + n, sadpb, &num00,
1253ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                        fn_ptr, x->nmvjointcost, x->mvcost,
1254ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                        ref_mv);
1255b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      if (thissme < INT_MAX)
1256b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
1257ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1258b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      // check to see if refining search is needed.
1259b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      if (num00 > further_steps - n)
1260ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        do_refine = 0;
1261ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1262ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      if (thissme < bestsme) {
1263ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        bestsme = thissme;
1264b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        *dst_mv = temp_mv;
1265ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1266ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1267ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
1268ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1269b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  // final 1-away diamond refining search
1270b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  if (do_refine) {
1271b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    const int search_range = 8;
1272b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    MV best_mv = *dst_mv;
1273ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range,
1274ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                       fn_ptr, x->nmvjointcost, x->mvcost,
1275ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                       ref_mv);
1276b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    if (thissme < INT_MAX)
1277b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
1278ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (thissme < bestsme) {
1279ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      bestsme = thissme;
1280b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      *dst_mv = best_mv;
1281ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1282ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
1283ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return bestsme;
1284ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
1285ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1286b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianint vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
1287ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                          int sad_per_bit, int distance,
1288b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                          const vp9_variance_fn_ptr_t *fn_ptr,
1289b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                          int *mvjcost, int *mvcost[2],
1290b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                          const MV *center_mv, MV *best_mv) {
1291ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int r, c;
1292b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MACROBLOCKD *const xd = &x->e_mbd;
12936ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const struct buf_2d *const what = &x->plane[0].src;
12946ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1295b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
1296b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
1297b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
1298b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
1299b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int *mvjsadcost = x->nmvjointsadcost;
1300ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1301b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
13026ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  int best_sad = fn_ptr->sdf(what->buf, what->stride,
13036ac915abcdb404a00d927fe6308a47fcf09d9519hkuang      get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) +
1304b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit);
1305b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  *best_mv = *ref_mv;
1306b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
1307b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  for (r = row_min; r < row_max; ++r) {
1308b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    for (c = col_min; c < col_max; ++c) {
13096ac915abcdb404a00d927fe6308a47fcf09d9519hkuang      const MV mv = {r, c};
13106ac915abcdb404a00d927fe6308a47fcf09d9519hkuang      const int sad = fn_ptr->sdf(what->buf, what->stride,
13116ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          get_buf_from_mv(in_what, &mv), in_what->stride, best_sad) +
13126ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
13136ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                         sad_per_bit);
1314b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian
1315b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      if (sad < best_sad) {
1316b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        best_sad = sad;
13176ac915abcdb404a00d927fe6308a47fcf09d9519hkuang        *best_mv = mv;
1318ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1319ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1320ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
1321b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  return best_sad;
1322ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
1323ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1324b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianint vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
1325ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                          int sad_per_bit, int distance,
1326b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                          const vp9_variance_fn_ptr_t *fn_ptr,
1327b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                          int *mvjcost, int *mvcost[2],
1328b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                          const MV *center_mv, MV *best_mv) {
1329b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MACROBLOCKD *const xd = &x->e_mbd;
1330b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const uint8_t *const what = x->plane[0].src.buf;
1331b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int what_stride = x->plane[0].src.stride;
1332b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const uint8_t *const in_what = xd->plane[0].pre[0].buf;
1333b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int in_what_stride = xd->plane[0].pre[0].stride;
1334b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  MV this_mv;
1335ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int bestsad = INT_MAX;
1336ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int r, c;
1337ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int thissad;
1338b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  int ref_row = ref_mv->row;
1339b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  int ref_col = ref_mv->col;
1340ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1341b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  // Apply further limits to prevent us looking using vectors that stretch
1342b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  // beyond the UMV border
1343b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int row_min = MAX(ref_row - distance, x->mv_row_min);
1344b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int row_max = MIN(ref_row + distance, x->mv_row_max);
1345b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int col_min = MAX(ref_col - distance, x->mv_col_min);
1346b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int col_max = MIN(ref_col + distance, x->mv_col_max);
1347ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int sad_array[3];
1348b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1349b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int *mvjsadcost = x->nmvjointsadcost;
1350ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1351ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1352ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // Work out the mid point for the search
1353b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col];
1354ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1355b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  best_mv->row = ref_row;
1356b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  best_mv->col = ref_col;
1357ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1358ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // Baseline value at the centre
1359ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bestsad = fn_ptr->sdf(what, what_stride,
1360ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        bestaddress, in_what_stride, 0x7fffffff)
1361b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian            + mvsad_err_cost(best_mv, &fcenter_mv,
13625ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                             mvjsadcost, mvsadcost, sad_per_bit);
1363ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1364ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  for (r = row_min; r < row_max; r++) {
1365b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    const uint8_t *check_here = &in_what[r * in_what_stride + col_min];
1366b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    this_mv.row = r;
1367ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    c = col_min;
1368ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1369b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) {
1370ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      int i;
1371ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1372ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1373ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1374ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      for (i = 0; i < 3; i++) {
1375ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        thissad = sad_array[i];
1376ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1377ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (thissad < bestsad) {
1378b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian          this_mv.col = c;
1379b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian          thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
13805ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                    mvjsadcost, mvsadcost, sad_per_bit);
1381ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1382ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          if (thissad < bestsad) {
1383ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            bestsad = thissad;
1384b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian            best_mv->row = r;
1385b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian            best_mv->col = c;
1386ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          }
1387ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
1388ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        check_here++;
1389ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        c++;
1390ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1391ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1392ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1393ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    while (c < col_max) {
13945ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang      thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
13955ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                            bestsad);
1396ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1397ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      if (thissad < bestsad) {
1398b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        this_mv.col = c;
1399b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
1400ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                   mvjsadcost, mvsadcost, sad_per_bit);
1401ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1402ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (thissad < bestsad) {
1403ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          bestsad = thissad;
1404b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian          best_mv->row = r;
1405b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian          best_mv->col = c;
1406ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
1407ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1408ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1409ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      check_here++;
1410ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      c++;
1411ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1412ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
1413b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  return bestsad;
1414ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
1415ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1416b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianint vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
1417ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                          int sad_per_bit, int distance,
1418b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                          const vp9_variance_fn_ptr_t *fn_ptr,
1419ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                          int *mvjcost, int *mvcost[2],
1420b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                          const MV *center_mv, MV *best_mv) {
1421b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MACROBLOCKD *const xd = &x->e_mbd;
1422b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const uint8_t *const what = x->plane[0].src.buf;
1423b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int what_stride = x->plane[0].src.stride;
1424b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const uint8_t *const in_what = xd->plane[0].pre[0].buf;
1425b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int in_what_stride = xd->plane[0].pre[0].stride;
1426b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  MV this_mv;
1427ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int bestsad = INT_MAX;
1428ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int r, c;
1429b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  int ref_row = ref_mv->row;
1430b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  int ref_col = ref_mv->col;
1431ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1432b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  // Apply further limits to prevent us looking using vectors that stretch
1433b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  // beyond the UMV border
1434b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int row_min = MAX(ref_row - distance, x->mv_row_min);
1435b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int row_max = MIN(ref_row + distance, x->mv_row_max);
1436b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int col_min = MAX(ref_col - distance, x->mv_col_min);
1437b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int col_max = MIN(ref_col + distance, x->mv_col_max);
1438ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8);
1439ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int sad_array[3];
1440b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1441ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1442b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int *mvjsadcost = x->nmvjointsadcost;
1443ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1444ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1445ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // Work out the mid point for the search
1446b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col];
1447ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1448b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  best_mv->row = ref_row;
1449b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  best_mv->col = ref_col;
1450ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1451b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  // Baseline value at the center
1452ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bestsad = fn_ptr->sdf(what, what_stride,
1453ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        bestaddress, in_what_stride, 0x7fffffff)
1454b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian            + mvsad_err_cost(best_mv, &fcenter_mv,
14555ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                             mvjsadcost, mvsadcost, sad_per_bit);
1456ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1457ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  for (r = row_min; r < row_max; r++) {
1458b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    const uint8_t *check_here = &in_what[r * in_what_stride + col_min];
1459b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian    this_mv.row = r;
1460ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    c = col_min;
1461ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1462ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    while ((c + 7) < col_max) {
1463ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      int i;
1464ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1465ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
1466ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1467ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      for (i = 0; i < 8; i++) {
14686ac915abcdb404a00d927fe6308a47fcf09d9519hkuang        unsigned int thissad = (unsigned int)sad_array8[i];
1469ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1470ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (thissad < bestsad) {
1471b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian          this_mv.col = c;
1472b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian          thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
14735ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                    mvjsadcost, mvsadcost, sad_per_bit);
1474ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1475ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          if (thissad < bestsad) {
1476ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            bestsad = thissad;
1477b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian            best_mv->row = r;
1478b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian            best_mv->col = c;
1479ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          }
1480ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
1481ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1482ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        check_here++;
1483ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        c++;
1484ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1485ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1486ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
14871184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) {
1488ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      int i;
1489ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1490ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1491ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1492ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      for (i = 0; i < 3; i++) {
14936ac915abcdb404a00d927fe6308a47fcf09d9519hkuang        unsigned int thissad = sad_array[i];
1494ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1495ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (thissad < bestsad) {
1496b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian          this_mv.col = c;
14976ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
14986ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                                    mvjsadcost, mvsadcost, sad_per_bit);
1499ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1500ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          if (thissad < bestsad) {
1501ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            bestsad = thissad;
1502b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian            best_mv->row = r;
1503b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian            best_mv->col = c;
1504ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          }
1505ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
1506ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1507ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        check_here++;
1508ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        c++;
1509ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1510ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1511ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1512ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    while (c < col_max) {
15136ac915abcdb404a00d927fe6308a47fcf09d9519hkuang      unsigned int thissad = fn_ptr->sdf(what, what_stride,
15146ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                                         check_here, in_what_stride, bestsad);
1515ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1516ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      if (thissad < bestsad) {
1517b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        this_mv.col = c;
1518b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
15195ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                  mvjsadcost, mvsadcost, sad_per_bit);
1520ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1521ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (thissad < bestsad) {
1522ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          bestsad = thissad;
1523b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian          best_mv->row = r;
1524b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian          best_mv->col = c;
1525ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
1526ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1527ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1528ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      check_here++;
1529ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      c++;
1530ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1531ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
1532b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  return bestsad;
1533ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
1534ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1535b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianint vp9_refining_search_sad_c(const MACROBLOCK *x,
1536b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                              MV *ref_mv, int error_per_bit,
1537b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                              int search_range,
1538b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                              const vp9_variance_fn_ptr_t *fn_ptr,
1539b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                              int *mvjcost, int *mvcost[2],
1540b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                              const MV *center_mv) {
1541b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
15426ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const MACROBLOCKD *const xd = &x->e_mbd;
15436ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const struct buf_2d *const what = &x->plane[0].src;
15446ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1545b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1546b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int *mvjsadcost = x->nmvjointsadcost;
1547ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1548ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
15496ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
15506ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                                     get_buf_from_mv(in_what, ref_mv),
15516ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                                     in_what->stride, 0x7fffffff) +
1552b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
15536ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  int i, j;
1554ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1555ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  for (i = 0; i < search_range; i++) {
1556ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    int best_site = -1;
1557ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1558ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    for (j = 0; j < 4; j++) {
15596ac915abcdb404a00d927fe6308a47fcf09d9519hkuang      const MV mv = {ref_mv->row + neighbors[j].row,
15606ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                     ref_mv->col + neighbors[j].col};
15616ac915abcdb404a00d927fe6308a47fcf09d9519hkuang      if (is_mv_in(x, &mv)) {
15626ac915abcdb404a00d927fe6308a47fcf09d9519hkuang        unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
15636ac915abcdb404a00d927fe6308a47fcf09d9519hkuang            get_buf_from_mv(in_what, &mv), in_what->stride, best_sad);
15646ac915abcdb404a00d927fe6308a47fcf09d9519hkuang        if (sad < best_sad) {
15656ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
15666ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                                error_per_bit);
15676ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          if (sad < best_sad) {
15686ac915abcdb404a00d927fe6308a47fcf09d9519hkuang            best_sad = sad;
1569ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            best_site = j;
1570ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          }
1571ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
1572ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1573ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1574ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
15755ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    if (best_site == -1) {
1576ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
15775ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    } else {
1578b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      ref_mv->row += neighbors[best_site].row;
1579b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      ref_mv->col += neighbors[best_site].col;
1580ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1581ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
15826ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  return best_sad;
1583ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
1584ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1585b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianint vp9_refining_search_sadx4(const MACROBLOCK *x,
1586b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                              MV *ref_mv, int error_per_bit,
1587b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                              int search_range,
1588b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                              const vp9_variance_fn_ptr_t *fn_ptr,
1589b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                              int *mvjcost, int *mvcost[2],
1590b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                              const MV *center_mv) {
1591b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MACROBLOCKD *const xd = &x->e_mbd;
15926ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
15936ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const struct buf_2d *const what = &x->plane[0].src;
15946ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1595b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1596b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int *mvjsadcost = x->nmvjointsadcost;
1597b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
15986ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
15996ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address,
16006ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                                    in_what->stride, 0x7fffffff) +
1601b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
16026ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  int i, j;
1603ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1604ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  for (i = 0; i < search_range; i++) {
1605ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    int best_site = -1;
16066ac915abcdb404a00d927fe6308a47fcf09d9519hkuang    const int all_in = ((ref_mv->row - 1) > x->mv_row_min) &
16076ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                       ((ref_mv->row + 1) < x->mv_row_max) &
16086ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                       ((ref_mv->col - 1) > x->mv_col_min) &
16096ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                       ((ref_mv->col + 1) < x->mv_col_max);
1610ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1611ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (all_in) {
16126ac915abcdb404a00d927fe6308a47fcf09d9519hkuang      unsigned int sads[4];
16136ac915abcdb404a00d927fe6308a47fcf09d9519hkuang      const uint8_t *const positions[4] = {
16146ac915abcdb404a00d927fe6308a47fcf09d9519hkuang        best_address - in_what->stride,
1615b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        best_address - 1,
1616b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian        best_address + 1,
16176ac915abcdb404a00d927fe6308a47fcf09d9519hkuang        best_address + in_what->stride
1618b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      };
1619ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
16206ac915abcdb404a00d927fe6308a47fcf09d9519hkuang      fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
1621ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
16226ac915abcdb404a00d927fe6308a47fcf09d9519hkuang      for (j = 0; j < 4; ++j) {
16236ac915abcdb404a00d927fe6308a47fcf09d9519hkuang        if (sads[j] < best_sad) {
16246ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          const MV mv = {ref_mv->row + neighbors[j].row,
16256ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                         ref_mv->col + neighbors[j].col};
16266ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          sads[j] += mvsad_err_cost(&mv, &fcenter_mv,
16275ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                         mvjsadcost, mvsadcost, error_per_bit);
1628ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
16296ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          if (sads[j] < best_sad) {
16306ac915abcdb404a00d927fe6308a47fcf09d9519hkuang            best_sad = sads[j];
1631ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            best_site = j;
1632ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          }
1633ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
1634ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1635ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    } else {
16366ac915abcdb404a00d927fe6308a47fcf09d9519hkuang      for (j = 0; j < 4; ++j) {
16376ac915abcdb404a00d927fe6308a47fcf09d9519hkuang        const MV mv = {ref_mv->row + neighbors[j].row,
16386ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                       ref_mv->col + neighbors[j].col};
16396ac915abcdb404a00d927fe6308a47fcf09d9519hkuang
16406ac915abcdb404a00d927fe6308a47fcf09d9519hkuang        if (is_mv_in(x, &mv)) {
16416ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
16426ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                                         get_buf_from_mv(in_what, &mv),
16436ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                                         in_what->stride, best_sad);
16446ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          if (sad < best_sad) {
16456ac915abcdb404a00d927fe6308a47fcf09d9519hkuang            sad += mvsad_err_cost(&mv, &fcenter_mv,
16466ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                                  mvjsadcost, mvsadcost, error_per_bit);
16476ac915abcdb404a00d927fe6308a47fcf09d9519hkuang
16486ac915abcdb404a00d927fe6308a47fcf09d9519hkuang            if (sad < best_sad) {
16496ac915abcdb404a00d927fe6308a47fcf09d9519hkuang              best_sad = sad;
1650ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang              best_site = j;
1651ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            }
1652ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          }
1653ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
1654ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1655ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1656ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
16575ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    if (best_site == -1) {
1658ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
16595ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang    } else {
1660b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      ref_mv->row += neighbors[best_site].row;
1661b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      ref_mv->col += neighbors[best_site].col;
16626ac915abcdb404a00d927fe6308a47fcf09d9519hkuang      best_address = get_buf_from_mv(in_what, ref_mv);
1663ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1664ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
1665ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
16666ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  return best_sad;
1667ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
1668ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1669b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian// This function is called when we do joint motion search in comp_inter_inter
1670b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian// mode.
1671b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanianint vp9_refining_search_8p_c(const MACROBLOCK *x,
1672b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                             MV *ref_mv, int error_per_bit,
1673b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                             int search_range,
1674b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                             const vp9_variance_fn_ptr_t *fn_ptr,
1675b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                             int *mvjcost, int *mvcost[2],
1676b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                             const MV *center_mv,
1677ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                             const uint8_t *second_pred, int w, int h) {
1678b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
1679b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian                           {-1, -1}, {1, -1}, {-1, 1}, {1, 1}};
16806ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const MACROBLOCKD *const xd = &x->e_mbd;
16816ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const struct buf_2d *const what = &x->plane[0].src;
16826ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1683b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
1684b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  const int *mvjsadcost = x->nmvjointsadcost;
1685ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
16866ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride,
16876ac915abcdb404a00d927fe6308a47fcf09d9519hkuang      get_buf_from_mv(in_what, ref_mv), in_what->stride,
16886ac915abcdb404a00d927fe6308a47fcf09d9519hkuang      second_pred, 0x7fffffff) +
1689b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
16906ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  int i, j;
1691ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1692b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian  for (i = 0; i < search_range; ++i) {
1693ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    int best_site = -1;
1694ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
16956ac915abcdb404a00d927fe6308a47fcf09d9519hkuang    for (j = 0; j < 8; ++j) {
16966ac915abcdb404a00d927fe6308a47fcf09d9519hkuang      const MV mv = {ref_mv->row + neighbors[j].row,
16976ac915abcdb404a00d927fe6308a47fcf09d9519hkuang                     ref_mv->col + neighbors[j].col};
1698ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
16996ac915abcdb404a00d927fe6308a47fcf09d9519hkuang      if (is_mv_in(x, &mv)) {
17006ac915abcdb404a00d927fe6308a47fcf09d9519hkuang        unsigned int sad = fn_ptr->sdaf(what->buf, what->stride,
17016ac915abcdb404a00d927fe6308a47fcf09d9519hkuang            get_buf_from_mv(in_what, &mv), in_what->stride,
17026ac915abcdb404a00d927fe6308a47fcf09d9519hkuang            second_pred, best_sad);
17036ac915abcdb404a00d927fe6308a47fcf09d9519hkuang        if (sad < best_sad) {
17046ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          sad += mvsad_err_cost(&mv, &fcenter_mv,
17055ae7ac49f08a179e4f054d99fcfc9dce78d26e58hkuang                                    mvjsadcost, mvsadcost, error_per_bit);
17066ac915abcdb404a00d927fe6308a47fcf09d9519hkuang          if (sad < best_sad) {
17076ac915abcdb404a00d927fe6308a47fcf09d9519hkuang            best_sad = sad;
1708ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            best_site = j;
1709ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          }
1710ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
1711ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1712ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1713ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1714ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (best_site == -1) {
1715ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
1716ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    } else {
1717b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      ref_mv->row += neighbors[best_site].row;
1718b08e2e23eec181e9951df33cd704ac294c5407b6Vignesh Venkatasubramanian      ref_mv->col += neighbors[best_site].col;
1719ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1720ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
17216ac915abcdb404a00d927fe6308a47fcf09d9519hkuang  return best_sad;
1722ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
1723