vp9_mcomp.c revision 1184aebb761cbeac9124c37189a80a1a58f04b6b
1ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/*
2ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *
4ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  Use of this source code is governed by a BSD-style license
5ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  that can be found in the LICENSE file in the root of the source
6ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  tree. An additional intellectual property rights grant can be found
7ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  in the file PATENTS.  All contributing project authors may
8ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang *  be found in the AUTHORS file in the root of the source tree.
9ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */
10ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
11ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include <limits.h>
12ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include <math.h>
131184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#include <stdio.h>
14ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
15ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "./vpx_config.h"
161184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
171184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#include "vpx_mem/vpx_mem.h"
181184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
19ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp9/common/vp9_findnearmv.h"
20ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp9/common/vp9_common.h"
21ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
221184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#include "vp9/encoder/vp9_onyx_int.h"
231184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#include "vp9/encoder/vp9_mcomp.h"
241184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
2591037db265ecdd914a26e056cf69207b4f50924ehkuang// #define NEW_DIAMOND_SEARCH
2691037db265ecdd914a26e056cf69207b4f50924ehkuang
271184aebb761cbeac9124c37189a80a1a58f04b6bhkuangvoid vp9_clamp_mv_min_max(MACROBLOCK *x, MV *mv) {
281184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
291184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
301184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
311184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
32ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
331184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Get intersection of UMV window and valid MV window to reduce # of checks
341184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // in diamond search.
35ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (x->mv_col_min < col_min)
36ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->mv_col_min = col_min;
37ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (x->mv_col_max > col_max)
38ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->mv_col_max = col_max;
39ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (x->mv_row_min < row_min)
40ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->mv_row_min = row_min;
41ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (x->mv_row_max > row_max)
42ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->mv_row_max = row_max;
43ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
44ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
4591037db265ecdd914a26e056cf69207b4f50924ehkuangint vp9_init_search_range(VP9_COMP *cpi, int size) {
46ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int sr = 0;
47ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
4891037db265ecdd914a26e056cf69207b4f50924ehkuang  // Minimum search size no matter what the passed in value.
4991037db265ecdd914a26e056cf69207b4f50924ehkuang  size = MAX(16, size);
5091037db265ecdd914a26e056cf69207b4f50924ehkuang
5191037db265ecdd914a26e056cf69207b4f50924ehkuang  while ((size << sr) < MAX_FULL_PEL_VAL)
52ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    sr++;
53ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
54ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (sr)
55ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    sr--;
56ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
5791037db265ecdd914a26e056cf69207b4f50924ehkuang  sr += cpi->sf.reduce_first_step_size;
5891037db265ecdd914a26e056cf69207b4f50924ehkuang  sr = MIN(sr, (cpi->sf.max_step_search_steps - 2));
59ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return sr;
60ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
61ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
62ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangint vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
63f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang                    int weight) {
64ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  MV v;
65ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  v.row = mv->as_mv.row - ref->as_mv.row;
66ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  v.col = mv->as_mv.col - ref->as_mv.col;
67ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return ROUND_POWER_OF_TWO((mvjcost[vp9_get_mv_joint(&v)] +
68ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                             mvcost[0][v.row] +
69ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                             mvcost[1][v.col]) * weight, 7);
70ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
71ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
72ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangstatic int mv_err_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2],
73f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang                       int error_per_bit) {
74ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (mvcost) {
75ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    MV v;
76ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    v.row = mv->as_mv.row - ref->as_mv.row;
77ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    v.col = mv->as_mv.col - ref->as_mv.col;
78ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return ROUND_POWER_OF_TWO((mvjcost[vp9_get_mv_joint(&v)] +
79ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                               mvcost[0][v.row] +
80ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                               mvcost[1][v.col]) * error_per_bit, 13);
81ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
82ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return 0;
83ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
84ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
85ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangstatic int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvjsadcost,
86ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                          int *mvsadcost[2], int error_per_bit) {
87ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (mvsadcost) {
88ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    MV v;
89ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    v.row = mv->as_mv.row - ref->as_mv.row;
90ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    v.col = mv->as_mv.col - ref->as_mv.col;
91ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return ROUND_POWER_OF_TWO((mvjsadcost[vp9_get_mv_joint(&v)] +
92ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                               mvsadcost[0][v.row] +
93ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                               mvsadcost[1][v.col]) * error_per_bit, 8);
94ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
95ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return 0;
96ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
97ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
98ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangvoid vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
99ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int len;
100ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int search_site_count = 0;
101ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
102ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // Generate offsets for 4 search sites per step.
103ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  x->ss[search_site_count].mv.col = 0;
104ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  x->ss[search_site_count].mv.row = 0;
105ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  x->ss[search_site_count].offset = 0;
106ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  search_site_count++;
107ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
108ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
109ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    // Compute offsets for search sites.
110ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.col = 0;
111ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.row = -len;
112ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].offset = -len * stride;
113ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    search_site_count++;
114ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
115ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    // Compute offsets for search sites.
116ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.col = 0;
117ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.row = len;
118ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].offset = len * stride;
119ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    search_site_count++;
120ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
121ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    // Compute offsets for search sites.
122ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.col = -len;
123ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.row = 0;
124ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].offset = -len;
125ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    search_site_count++;
126ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
127ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    // Compute offsets for search sites.
128ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.col = len;
129ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.row = 0;
130ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].offset = len;
131ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    search_site_count++;
132ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
133ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
134ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  x->ss_count = search_site_count;
135ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  x->searches_per_step = 4;
136ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
137ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
138ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangvoid vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
139ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int len;
140ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int search_site_count = 0;
141ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
142ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // Generate offsets for 8 search sites per step.
143ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  x->ss[search_site_count].mv.col = 0;
144ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  x->ss[search_site_count].mv.row = 0;
145ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  x->ss[search_site_count].offset = 0;
146ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  search_site_count++;
147ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
148ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
149ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    // Compute offsets for search sites.
150ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.col = 0;
151ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.row = -len;
152ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].offset = -len * stride;
153ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    search_site_count++;
154ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
155ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    // Compute offsets for search sites.
156ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.col = 0;
157ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.row = len;
158ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].offset = len * stride;
159ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    search_site_count++;
160ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
161ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    // Compute offsets for search sites.
162ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.col = -len;
163ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.row = 0;
164ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].offset = -len;
165ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    search_site_count++;
166ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
167ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    // Compute offsets for search sites.
168ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.col = len;
169ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.row = 0;
170ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].offset = len;
171ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    search_site_count++;
172ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
173ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    // Compute offsets for search sites.
174ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.col = -len;
175ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.row = -len;
176ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].offset = -len * stride - len;
177ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    search_site_count++;
178ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
179ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    // Compute offsets for search sites.
180ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.col = len;
181ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.row = -len;
182ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].offset = -len * stride + len;
183ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    search_site_count++;
184ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
185ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    // Compute offsets for search sites.
186ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.col = -len;
187ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.row = len;
188ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].offset = len * stride - len;
189ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    search_site_count++;
190ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
191ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    // Compute offsets for search sites.
192ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.col = len;
193ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].mv.row = len;
194ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    x->ss[search_site_count].offset = len * stride + len;
195ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    search_site_count++;
196ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
197ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
198ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  x->ss_count = search_site_count;
199ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  x->searches_per_step = 8;
200ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
201ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
202ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/*
203ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * To avoid the penalty for crossing cache-line read, preload the reference
204ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * area in a small buffer, which is aligned to make sure there won't be crossing
205ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * cache-line read while reading from this buffer. This reduced the cpu
206ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * cycles spent on reading ref data in sub-pixel filter functions.
207ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
208ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
209ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * could reduce the area.
210ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */
211ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
212ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/* estimated cost of a motion vector (r,c) */
213ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#define MVC(r, c)                                       \
214ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    (mvcost ?                                           \
215ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang     ((mvjcost[((r) != rr) * 2 + ((c) != rc)] +         \
216ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang       mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
217ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      error_per_bit + 4096) >> 13 : 0)
218ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
219ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
220ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#define SP(x) (((x) & 7) << 1)  // convert motion vector component to offset
221ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                // for svf calc
222ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
223ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#define IFMVCV(r, c, s, e)                                \
224ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (c >= minc && c <= maxc && r >= minr && r <= maxr) \
225ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      s                                                   \
226ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    else                                                  \
227ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      e;
228ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
229ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/* pointer to predictor base of a motionvector */
230ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#define PRE(r, c) (y + (((r) >> 3) * y_stride + ((c) >> 3) -(offset)))
231ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
232ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/* returns subpixel variance error function */
233ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#define DIST(r, c) \
234ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, src_stride, &sse)
235ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
236ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/* checks if (r, c) has better score than previous best */
237ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#define CHECK_BETTER(v, r, c) \
238ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    IFMVCV(r, c, {                                                       \
239ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      thismse = (DIST(r, c));                                            \
240ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      if ((v = MVC(r, c) + thismse) < besterr) {                         \
241ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        besterr = v;                                                     \
242ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        br = r;                                                          \
243ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        bc = c;                                                          \
244ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        *distortion = thismse;                                           \
245ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        *sse1 = sse;                                                     \
246ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }                                                                  \
247ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    },                                                                   \
248ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    v = INT_MAX;)
249ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2501184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#define FIRST_LEVEL_CHECKS                              \
2511184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  {                                                     \
2521184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    unsigned int left, right, up, down, diag;           \
2531184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    CHECK_BETTER(left, tr, tc - hstep);                 \
2541184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    CHECK_BETTER(right, tr, tc + hstep);                \
2551184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    CHECK_BETTER(up, tr - hstep, tc);                   \
2561184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    CHECK_BETTER(down, tr + hstep, tc);                 \
2571184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    whichdir = (left < right ? 0 : 1) +                 \
2581184aebb761cbeac9124c37189a80a1a58f04b6bhkuang               (up < down ? 0 : 2);                     \
2591184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    switch (whichdir) {                                 \
2601184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      case 0:                                           \
2611184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        CHECK_BETTER(diag, tr - hstep, tc - hstep);     \
2621184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        break;                                          \
2631184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      case 1:                                           \
2641184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        CHECK_BETTER(diag, tr - hstep, tc + hstep);     \
2651184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        break;                                          \
2661184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      case 2:                                           \
2671184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        CHECK_BETTER(diag, tr + hstep, tc - hstep);     \
2681184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        break;                                          \
2691184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      case 3:                                           \
2701184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        CHECK_BETTER(diag, tr + hstep, tc + hstep);     \
2711184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        break;                                          \
2721184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    }                                                   \
2731184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  }
2741184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
2751184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#define SECOND_LEVEL_CHECKS                             \
2761184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  {                                                     \
2771184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    int kr, kc;                                         \
2781184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    unsigned int second;                                \
2791184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    if (tr != br && tc != bc) {                         \
2801184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      kr = br - tr;                                     \
2811184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      kc = bc - tc;                                     \
2821184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      CHECK_BETTER(second, tr + kr, tc + 2 * kc);       \
2831184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      CHECK_BETTER(second, tr + 2 * kr, tc + kc);       \
2841184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    } else if (tr == br && tc != bc) {                  \
2851184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      kc = bc - tc;                                     \
2861184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      CHECK_BETTER(second, tr + hstep, tc + 2 * kc);    \
2871184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      CHECK_BETTER(second, tr - hstep, tc + 2 * kc);    \
2881184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      switch (whichdir) {                               \
2891184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        case 0:                                         \
2901184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        case 1:                                         \
2911184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          CHECK_BETTER(second, tr + hstep, tc + kc);    \
2921184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          break;                                        \
2931184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        case 2:                                         \
2941184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        case 3:                                         \
2951184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          CHECK_BETTER(second, tr - hstep, tc + kc);    \
2961184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          break;                                        \
2971184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      }                                                 \
2981184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    } else if (tr != br && tc == bc) {                  \
2991184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      kr = br - tr;                                     \
3001184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      CHECK_BETTER(second, tr + 2 * kr, tc + hstep);    \
3011184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      CHECK_BETTER(second, tr + 2 * kr, tc - hstep);    \
3021184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      switch (whichdir) {                               \
3031184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        case 0:                                         \
3041184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        case 2:                                         \
3051184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          CHECK_BETTER(second, tr + kr, tc + hstep);    \
3061184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          break;                                        \
3071184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        case 1:                                         \
3081184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        case 3:                                         \
3091184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          CHECK_BETTER(second, tr + kr, tc - hstep);    \
3101184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          break;                                        \
3111184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      }                                                 \
3121184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    }                                                   \
3131184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  }
3141184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
3151184aebb761cbeac9124c37189a80a1a58f04b6bhkuangint vp9_find_best_sub_pixel_iterative(MACROBLOCK *x,
3161184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      int_mv *bestmv, int_mv *ref_mv,
3171184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      int error_per_bit,
3181184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      const vp9_variance_fn_ptr_t *vfp,
3191184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      int forced_stop,
3201184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      int iters_per_step,
3211184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      int *mvjcost, int *mvcost[2],
3221184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      int *distortion,
3231184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      unsigned int *sse1) {
324ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *z = x->plane[0].src.buf;
325ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int src_stride = x->plane[0].src.stride;
326ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  MACROBLOCKD *xd = &x->e_mbd;
327ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
328ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int besterr = INT_MAX;
329ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int sse;
330ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int whichdir;
3311184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  unsigned int halfiters = iters_per_step;
3321184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  unsigned int quarteriters = iters_per_step;
3331184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  unsigned int eighthiters = iters_per_step;
334ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int thismse;
335ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
336ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *y = xd->plane[0].pre[0].buf +
337ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang               (bestmv->as_mv.row) * xd->plane[0].pre[0].stride +
338ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang               bestmv->as_mv.col;
339ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
3401184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int y_stride = xd->plane[0].pre[0].stride;
341ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
3421184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int rr = ref_mv->as_mv.row;
3431184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int rc = ref_mv->as_mv.col;
3441184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int br = bestmv->as_mv.row << 3;
3451184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int bc = bestmv->as_mv.col << 3;
3461184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int hstep = 4;
3471184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int minc = MAX(x->mv_col_min << 3, ref_mv->as_mv.col - MV_MAX);
3481184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int maxc = MIN(x->mv_col_max << 3, ref_mv->as_mv.col + MV_MAX);
3491184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int minr = MAX(x->mv_row_min << 3, ref_mv->as_mv.row - MV_MAX);
3501184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int maxr = MIN(x->mv_row_max << 3, ref_mv->as_mv.row + MV_MAX);
351ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
3521184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int tr = br;
3531184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int tc = bc;
354ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
3551184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
356ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
357ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // central mv
358ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bestmv->as_mv.row <<= 3;
359ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bestmv->as_mv.col <<= 3;
360ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
361ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // calculate central point error
362ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
363ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  *distortion = besterr;
364f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
365ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
366ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // TODO: Each subsequent iteration checks at least one point in
367ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // common with the last iteration could be 2 ( if diag selected)
3681184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  while (halfiters--) {
369ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    // 1/2 pel
3701184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    FIRST_LEVEL_CHECKS;
371ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    // no reason to check the same one again.
372ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (tr == br && tc == bc)
373ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
374ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    tr = br;
375ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    tc = bc;
376ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
377ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
378ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // TODO: Each subsequent iteration checks at least one point in common with
379ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // the last iteration could be 2 ( if diag selected) 1/4 pel
3801184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
3811184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
3821184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (forced_stop != 2) {
3831184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    hstep >>= 1;
3841184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    while (quarteriters--) {
3851184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      FIRST_LEVEL_CHECKS;
3861184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      // no reason to check the same one again.
3871184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      if (tr == br && tc == bc)
388ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        break;
3891184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      tr = br;
3901184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      tc = bc;
391ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
392ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
393ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
3941184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
3951184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      forced_stop == 0) {
396ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    hstep >>= 1;
3971184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    while (eighthiters--) {
3981184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      FIRST_LEVEL_CHECKS;
399ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      // no reason to check the same one again.
400ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      if (tr == br && tc == bc)
401ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        break;
402ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      tr = br;
403ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      tc = bc;
404ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
405ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
4061184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
407ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bestmv->as_mv.row = br;
408ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bestmv->as_mv.col = bc;
409ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
410ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
411ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
412ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return INT_MAX;
413ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
414ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return besterr;
415ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
416ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
4171184aebb761cbeac9124c37189a80a1a58f04b6bhkuangint vp9_find_best_sub_pixel_tree(MACROBLOCK *x,
418ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                 int_mv *bestmv, int_mv *ref_mv,
419ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                 int error_per_bit,
420ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                 const vp9_variance_fn_ptr_t *vfp,
4211184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                 int forced_stop,
4221184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                 int iters_per_step,
423ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                 int *mvjcost, int *mvcost[2],
424ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                 int *distortion,
4251184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                 unsigned int *sse1) {
426ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *z = x->plane[0].src.buf;
427ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int src_stride = x->plane[0].src.stride;
428ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  MACROBLOCKD *xd = &x->e_mbd;
429ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int rr, rc, br, bc, hstep;
430ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int tr, tc;
431ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int besterr = INT_MAX;
432ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int sse;
433ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int whichdir;
434ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int thismse;
435ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int maxc, minc, maxr, minr;
436ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int y_stride;
437ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int offset;
4381184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  unsigned int halfiters = iters_per_step;
4391184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  unsigned int quarteriters = iters_per_step;
4401184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  unsigned int eighthiters = iters_per_step;
441ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
442ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *y = xd->plane[0].pre[0].buf +
443ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang               (bestmv->as_mv.row) * xd->plane[0].pre[0].stride +
444ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang               bestmv->as_mv.col;
445ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
446ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  y_stride = xd->plane[0].pre[0].stride;
447ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
448ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  rr = ref_mv->as_mv.row;
449ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  rc = ref_mv->as_mv.col;
450ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  br = bestmv->as_mv.row << 3;
451ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bc = bestmv->as_mv.col << 3;
452ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  hstep = 4;
4531184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  minc = MAX(x->mv_col_min << 3,
4541184aebb761cbeac9124c37189a80a1a58f04b6bhkuang             (ref_mv->as_mv.col) - ((1 << MV_MAX_BITS) - 1));
4551184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  maxc = MIN(x->mv_col_max << 3,
4561184aebb761cbeac9124c37189a80a1a58f04b6bhkuang             (ref_mv->as_mv.col) + ((1 << MV_MAX_BITS) - 1));
4571184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  minr = MAX(x->mv_row_min << 3,
4581184aebb761cbeac9124c37189a80a1a58f04b6bhkuang             (ref_mv->as_mv.row) - ((1 << MV_MAX_BITS) - 1));
4591184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  maxr = MIN(x->mv_row_max << 3,
4601184aebb761cbeac9124c37189a80a1a58f04b6bhkuang             (ref_mv->as_mv.row) + ((1 << MV_MAX_BITS) - 1));
461ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
462ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  tr = br;
463ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  tc = bc;
464ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
465ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
466ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
467ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // central mv
468ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bestmv->as_mv.row <<= 3;
469ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bestmv->as_mv.col <<= 3;
470ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
471ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // calculate central point error
4721184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
473ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  *distortion = besterr;
474f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
475ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
4761184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // 1/2 pel
4771184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  FIRST_LEVEL_CHECKS;
4781184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (halfiters > 1) {
4791184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    SECOND_LEVEL_CHECKS;
4801184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  }
4811184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  tr = br;
4821184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  tc = bc;
483ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
4841184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
4851184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (forced_stop != 2) {
4861184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    hstep >>= 1;
4871184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    FIRST_LEVEL_CHECKS;
4881184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    if (quarteriters > 1) {
4891184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      SECOND_LEVEL_CHECKS;
490ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
491ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    tr = br;
492ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    tc = bc;
493ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
494ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
4951184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
4961184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      forced_stop == 0) {
4971184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    hstep >>= 1;
4981184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    FIRST_LEVEL_CHECKS;
4991184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    if (eighthiters > 1) {
5001184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      SECOND_LEVEL_CHECKS;
501ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
502ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    tr = br;
503ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    tc = bc;
504ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
505ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
506ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bestmv->as_mv.row = br;
507ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bestmv->as_mv.col = bc;
508ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
509ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
510ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
511ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return INT_MAX;
512ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
513ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return besterr;
514ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
515ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
516ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#undef DIST
5171184aebb761cbeac9124c37189a80a1a58f04b6bhkuang/* returns subpixel variance error function */
5181184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#define DIST(r, c) \
5191184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    vfp->svaf(PRE(r, c), y_stride, SP(c), SP(r), \
5201184aebb761cbeac9124c37189a80a1a58f04b6bhkuang              z, src_stride, &sse, second_pred)
521ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
5221184aebb761cbeac9124c37189a80a1a58f04b6bhkuangint vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x,
5231184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                           int_mv *bestmv, int_mv *ref_mv,
5241184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                           int error_per_bit,
5251184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                           const vp9_variance_fn_ptr_t *vfp,
5261184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                           int forced_stop,
5271184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                           int iters_per_step,
5281184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                           int *mvjcost, int *mvcost[2],
5291184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                           int *distortion,
5301184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                           unsigned int *sse1,
5311184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                           const uint8_t *second_pred,
5321184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                           int w, int h) {
5331184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  uint8_t *const z = x->plane[0].src.buf;
5341184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int src_stride = x->plane[0].src.stride;
5351184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  MACROBLOCKD *const xd = &x->e_mbd;
5361184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
5371184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  unsigned int besterr = INT_MAX;
538ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int sse;
5391184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  unsigned int whichdir;
5401184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  unsigned int halfiters = iters_per_step;
5411184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  unsigned int quarteriters = iters_per_step;
5421184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  unsigned int eighthiters = iters_per_step;
543ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int thismse;
544ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
5451184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
5461184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  uint8_t *const y = xd->plane[0].pre[0].buf +
547ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang               (bestmv->as_mv.row) * xd->plane[0].pre[0].stride +
548ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang               bestmv->as_mv.col;
549ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
5501184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int y_stride = xd->plane[0].pre[0].stride;
551ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
5521184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int rr = ref_mv->as_mv.row;
5531184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int rc = ref_mv->as_mv.col;
5541184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int br = bestmv->as_mv.row << 3;
5551184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int bc = bestmv->as_mv.col << 3;
5561184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int hstep = 4;
5571184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int minc = MAX(x->mv_col_min << 3, ref_mv->as_mv.col - MV_MAX);
5581184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int maxc = MIN(x->mv_col_max << 3, ref_mv->as_mv.col + MV_MAX);
5591184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int minr = MAX(x->mv_row_min << 3, ref_mv->as_mv.row - MV_MAX);
5601184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int maxr = MIN(x->mv_row_max << 3, ref_mv->as_mv.row + MV_MAX);
561ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
5621184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int tr = br;
5631184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int tc = bc;
564ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
5651184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  const int offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
566ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
5671184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // central mv
5681184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  bestmv->as_mv.row <<= 3;
5691184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  bestmv->as_mv.col <<= 3;
570ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
5711184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // calculate central point error
5721184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // TODO(yunqingwang): central pointer error was already calculated in full-
5731184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // pixel search, and can be passed in this function.
5741184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
5751184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
5761184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  *distortion = besterr;
5771184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
578ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
5791184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Each subsequent iteration checks at least one point in
5801184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // common with the last iteration could be 2 ( if diag selected)
5811184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  while (halfiters--) {
5821184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    // 1/2 pel
5831184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    FIRST_LEVEL_CHECKS;
5841184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    // no reason to check the same one again.
5851184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    if (tr == br && tc == bc)
586ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
5871184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    tr = br;
5881184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    tc = bc;
589ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
590ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
5911184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Each subsequent iteration checks at least one point in common with
5921184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // the last iteration could be 2 ( if diag selected) 1/4 pel
593ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
5941184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
5951184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (forced_stop != 2) {
5961184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    hstep >>= 1;
5971184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    while (quarteriters--) {
5981184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      FIRST_LEVEL_CHECKS;
5991184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      // no reason to check the same one again.
6001184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      if (tr == br && tc == bc)
6011184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        break;
6021184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      tr = br;
6031184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      tc = bc;
6041184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    }
605ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
606ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
6071184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
6081184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      forced_stop == 0) {
6091184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    hstep >>= 1;
6101184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    while (eighthiters--) {
6111184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      FIRST_LEVEL_CHECKS;
6121184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      // no reason to check the same one again.
6131184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      if (tr == br && tc == bc)
6141184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        break;
6151184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      tr = br;
6161184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      tc = bc;
6171184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    }
618ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
6191184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  bestmv->as_mv.row = br;
6201184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  bestmv->as_mv.col = bc;
621ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
6221184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
6231184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
6241184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    return INT_MAX;
625ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
6261184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  return besterr;
627ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
628ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
6291184aebb761cbeac9124c37189a80a1a58f04b6bhkuangint vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x,
6301184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      int_mv *bestmv, int_mv *ref_mv,
6311184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      int error_per_bit,
6321184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      const vp9_variance_fn_ptr_t *vfp,
6331184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      int forced_stop,
6341184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      int iters_per_step,
6351184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      int *mvjcost, int *mvcost[2],
6361184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      int *distortion,
6371184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      unsigned int *sse1,
6381184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      const uint8_t *second_pred,
6391184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                      int w, int h) {
640ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *z = x->plane[0].src.buf;
641ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int src_stride = x->plane[0].src.stride;
6421184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  MACROBLOCKD *xd = &x->e_mbd;
6431184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int rr, rc, br, bc, hstep;
6441184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int tr, tc;
6451184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  unsigned int besterr = INT_MAX;
646ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int sse;
6471184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  unsigned int whichdir;
648ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int thismse;
6491184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int maxc, minc, maxr, minr;
650ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int y_stride;
6511184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int offset;
6521184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  unsigned int halfiters = iters_per_step;
6531184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  unsigned int quarteriters = iters_per_step;
6541184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  unsigned int eighthiters = iters_per_step;
655ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
6561184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
657ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *y = xd->plane[0].pre[0].buf +
6581184aebb761cbeac9124c37189a80a1a58f04b6bhkuang               (bestmv->as_mv.row) * xd->plane[0].pre[0].stride +
6591184aebb761cbeac9124c37189a80a1a58f04b6bhkuang               bestmv->as_mv.col;
6601184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
661ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  y_stride = xd->plane[0].pre[0].stride;
662ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
6631184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  rr = ref_mv->as_mv.row;
6641184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  rc = ref_mv->as_mv.col;
6651184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  br = bestmv->as_mv.row << 3;
6661184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  bc = bestmv->as_mv.col << 3;
6671184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  hstep = 4;
6681184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  minc = MAX(x->mv_col_min << 3, (ref_mv->as_mv.col) -
6691184aebb761cbeac9124c37189a80a1a58f04b6bhkuang             ((1 << MV_MAX_BITS) - 1));
6701184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  maxc = MIN(x->mv_col_max << 3, (ref_mv->as_mv.col) +
6711184aebb761cbeac9124c37189a80a1a58f04b6bhkuang             ((1 << MV_MAX_BITS) - 1));
6721184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  minr = MAX(x->mv_row_min << 3, (ref_mv->as_mv.row) -
6731184aebb761cbeac9124c37189a80a1a58f04b6bhkuang             ((1 << MV_MAX_BITS) - 1));
6741184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  maxr = MIN(x->mv_row_max << 3, (ref_mv->as_mv.row) +
6751184aebb761cbeac9124c37189a80a1a58f04b6bhkuang             ((1 << MV_MAX_BITS) - 1));
6761184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
6771184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  tr = br;
6781184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  tc = bc;
6791184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
6801184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
6811184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
6821184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
683ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // central mv
684ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bestmv->as_mv.row <<= 3;
685ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bestmv->as_mv.col <<= 3;
686ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
687ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // calculate central point error
6881184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // TODO(yunqingwang): central pointer error was already calculated in full-
6891184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // pixel search, and can be passed in this function.
6901184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
6911184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
6921184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  *distortion = besterr;
6931184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
694ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
6951184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Each subsequent iteration checks at least one point in
6961184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // common with the last iteration could be 2 ( if diag selected)
6971184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // 1/2 pel
6981184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  FIRST_LEVEL_CHECKS;
6991184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (halfiters > 1) {
7001184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    SECOND_LEVEL_CHECKS;
701ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
7021184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  tr = br;
7031184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  tc = bc;
704ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
7051184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Each subsequent iteration checks at least one point in common with
7061184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // the last iteration could be 2 ( if diag selected) 1/4 pel
707ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
7081184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
7091184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (forced_stop != 2) {
7101184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    hstep >>= 1;
7111184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    FIRST_LEVEL_CHECKS;
7121184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    if (quarteriters > 1) {
7131184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      SECOND_LEVEL_CHECKS;
7141184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    }
7151184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    tr = br;
7161184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    tc = bc;
717ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
718ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
7191184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) &&
7201184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      forced_stop == 0) {
7211184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    hstep >>= 1;
7221184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    FIRST_LEVEL_CHECKS;
7231184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    if (eighthiters > 1) {
7241184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      SECOND_LEVEL_CHECKS;
7251184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    }
7261184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    tr = br;
7271184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    tc = bc;
728ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
7291184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  bestmv->as_mv.row = br;
7301184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  bestmv->as_mv.col = bc;
731ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
7321184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
7331184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3)))
7341184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    return INT_MAX;
735ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
7361184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  return besterr;
737ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
738ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
7391184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#undef MVC
7401184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#undef PRE
7411184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#undef DIST
7421184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#undef IFMVCV
7431184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#undef CHECK_BETTER
7441184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#undef SP
7451184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
746ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#define CHECK_BOUNDS(range) \
747ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  {\
748ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    all_in = 1;\
749ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    all_in &= ((br-range) >= x->mv_row_min);\
750ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    all_in &= ((br+range) <= x->mv_row_max);\
751ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    all_in &= ((bc-range) >= x->mv_col_min);\
752ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    all_in &= ((bc+range) <= x->mv_col_max);\
753ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
754ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
755ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#define CHECK_POINT \
756ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  {\
757ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (this_mv.as_mv.col < x->mv_col_min) continue;\
758ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (this_mv.as_mv.col > x->mv_col_max) continue;\
759ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (this_mv.as_mv.row < x->mv_row_min) continue;\
760ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (this_mv.as_mv.row > x->mv_row_max) continue;\
761ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
762ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
763ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#define CHECK_BETTER \
764ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  {\
765ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (thissad < bestsad)\
766ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    {\
7671184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      if (use_mvcost) \
7681184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        thissad += mvsad_err_cost(&this_mv, &fcenter_mv, \
7691184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                  mvjsadcost, mvsadcost, \
7701184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                  sad_per_bit);\
771ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      if (thissad < bestsad)\
772ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      {\
773ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        bestsad = thissad;\
774ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        best_site = i;\
775ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }\
776ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }\
777ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
778ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
7791184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#define get_next_chkpts(list, i, n)   \
7801184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    list[0] = ((i) == 0 ? (n) - 1 : (i) - 1);  \
7811184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    list[1] = (i);                             \
7821184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    list[2] = ((i) == (n) - 1 ? 0 : (i) + 1);
7831184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
7841184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#define MAX_PATTERN_SCALES         11
7851184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#define MAX_PATTERN_CANDIDATES      8  // max number of canddiates per scale
7861184aebb761cbeac9124c37189a80a1a58f04b6bhkuang#define PATTERN_CANDIDATES_REF      3  // number of refinement candidates
7871184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
7881184aebb761cbeac9124c37189a80a1a58f04b6bhkuang// Generic pattern search function that searches over multiple scales.
7891184aebb761cbeac9124c37189a80a1a58f04b6bhkuang// Each scale can have a different number of candidates and shape of
7901184aebb761cbeac9124c37189a80a1a58f04b6bhkuang// candidates as indicated in the num_candidates and candidates arrays
7911184aebb761cbeac9124c37189a80a1a58f04b6bhkuang// passed into this function
7921184aebb761cbeac9124c37189a80a1a58f04b6bhkuangstatic int vp9_pattern_search(MACROBLOCK *x,
7931184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                              int_mv *ref_mv,
7941184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                              int search_param,
7951184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                              int sad_per_bit,
7961184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                              int do_init_search,
7971184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                              int do_refine,
7981184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                              const vp9_variance_fn_ptr_t *vfp,
7991184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                              int use_mvcost,
8001184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                              int_mv *center_mv, int_mv *best_mv,
8011184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                              const int num_candidates[MAX_PATTERN_SCALES],
8021184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                              const MV candidates[MAX_PATTERN_SCALES]
8031184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                                 [MAX_PATTERN_CANDIDATES]) {
804ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const MACROBLOCKD* const xd = &x->e_mbd;
8051184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
8061184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
8071184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  };
8081184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int i, j, s, t;
809ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *what = x->plane[0].src.buf;
810ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int what_stride = x->plane[0].src.stride;
811ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int in_what_stride = xd->plane[0].pre[0].stride;
812ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int br, bc;
813ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int_mv this_mv;
8141184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int bestsad = INT_MAX;
8151184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int thissad;
816ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *base_offset;
817ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *this_offset;
818ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int k = -1;
819ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int all_in;
820ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int best_site = -1;
821ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int_mv fcenter_mv;
8221184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int best_init_s = search_param_to_steps[search_param];
8231184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int *mvjsadcost = x->nmvjointsadcost;
8241184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
8251184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
826ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
827ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
828ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
829ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // adjust ref_mv to make sure it is within MV range
830f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang  clamp_mv(&ref_mv->as_mv,
831f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang           x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
832ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  br = ref_mv->as_mv.row;
833ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bc = ref_mv->as_mv.col;
834ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
835ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // Work out the start point for the search
836ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  base_offset = (uint8_t *)(xd->plane[0].pre[0].buf);
8371184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  this_offset = base_offset + (br * in_what_stride) + bc;
838ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  this_mv.as_mv.row = br;
839ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  this_mv.as_mv.col = bc;
840ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bestsad = vfp->sdf(what, what_stride, this_offset,
841ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                     in_what_stride, 0x7fffffff)
842ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            + mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost,
843ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                             sad_per_bit);
844ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
8451184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Search all possible scales upto the search param around the center point
8461184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // pick the scale of the point that is best as the starting scale of
8471184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // further steps around it.
8481184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (do_init_search) {
8491184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    s = best_init_s;
8501184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    best_init_s = -1;
8511184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    for (t = 0; t <= s; ++t) {
8521184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      best_site = -1;
8531184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      CHECK_BOUNDS((1 << t))
8541184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      if (all_in) {
8551184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        for (i = 0; i < num_candidates[t]; i++) {
8561184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          this_mv.as_mv.row = br + candidates[t][i].row;
8571184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          this_mv.as_mv.col = bc + candidates[t][i].col;
8581184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
8591184aebb761cbeac9124c37189a80a1a58f04b6bhkuang              this_mv.as_mv.col;
8601184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
8611184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                             bestsad);
8621184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          CHECK_BETTER
8631184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        }
8641184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      } else {
8651184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        for (i = 0; i < num_candidates[t]; i++) {
8661184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          this_mv.as_mv.row = br + candidates[t][i].row;
8671184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          this_mv.as_mv.col = bc + candidates[t][i].col;
8681184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          CHECK_POINT
8691184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
8701184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                        this_mv.as_mv.col;
8711184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
8721184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                             bestsad);
8731184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          CHECK_BETTER
8741184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        }
8751184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      }
8761184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      if (best_site == -1) {
8771184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        continue;
8781184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      } else {
8791184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        best_init_s = t;
8801184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        k = best_site;
8811184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      }
882ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
8831184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    if (best_init_s != -1) {
8841184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      br += candidates[best_init_s][k].row;
8851184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      bc += candidates[best_init_s][k].col;
886ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
887ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
888ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
8891184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // If the center point is still the best, just skip this and move to
8901184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // the refinement step.
8911184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (best_init_s != -1) {
8921184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    s = best_init_s;
893ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    best_site = -1;
8941184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    do {
8951184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      // No need to search all 6 points the 1st time if initial search was used
8961184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      if (!do_init_search || s != best_init_s) {
8971184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        CHECK_BOUNDS((1 << s))
8981184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        if (all_in) {
8991184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          for (i = 0; i < num_candidates[s]; i++) {
9001184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            this_mv.as_mv.row = br + candidates[s][i].row;
9011184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            this_mv.as_mv.col = bc + candidates[s][i].col;
9021184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
9031184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                this_mv.as_mv.col;
9041184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
9051184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                               bestsad);
9061184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            CHECK_BETTER
9071184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          }
9081184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        } else {
9091184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          for (i = 0; i < num_candidates[s]; i++) {
9101184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            this_mv.as_mv.row = br + candidates[s][i].row;
9111184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            this_mv.as_mv.col = bc + candidates[s][i].col;
9121184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            CHECK_POINT
9131184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
9141184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                          this_mv.as_mv.col;
9151184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
9161184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                               bestsad);
9171184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            CHECK_BETTER
9181184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          }
9191184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        }
920ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
9211184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        if (best_site == -1) {
9221184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          continue;
9231184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        } else {
9241184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          br += candidates[s][best_site].row;
9251184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          bc += candidates[s][best_site].col;
9261184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          k = best_site;
9271184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        }
928ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
929ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
9301184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      do {
9311184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        int next_chkpts_indices[PATTERN_CANDIDATES_REF];
9321184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        best_site = -1;
9331184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        CHECK_BOUNDS((1 << s))
9341184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
9351184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        get_next_chkpts(next_chkpts_indices, k, num_candidates[s]);
9361184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        if (all_in) {
9371184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
9381184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            this_mv.as_mv.row = br +
9391184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                candidates[s][next_chkpts_indices[i]].row;
9401184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            this_mv.as_mv.col = bc +
9411184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                candidates[s][next_chkpts_indices[i]].col;
9421184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
9431184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                this_mv.as_mv.col;
9441184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
9451184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                               bestsad);
9461184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            CHECK_BETTER
9471184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          }
9481184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        } else {
9491184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
9501184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            this_mv.as_mv.row = br +
9511184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                candidates[s][next_chkpts_indices[i]].row;
9521184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            this_mv.as_mv.col = bc +
9531184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                candidates[s][next_chkpts_indices[i]].col;
9541184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            CHECK_POINT
9551184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
9561184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                          this_mv.as_mv.col;
9571184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
9581184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                               bestsad);
9591184aebb761cbeac9124c37189a80a1a58f04b6bhkuang            CHECK_BETTER
9601184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          }
9611184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        }
9621184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
9631184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        if (best_site != -1) {
9641184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          k = next_chkpts_indices[best_site];
9651184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          br += candidates[s][k].row;
9661184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          bc += candidates[s][k].col;
9671184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        }
9681184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      } while (best_site != -1);
9691184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    } while (s--);
970ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
971ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
9721184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Check 4 1-away neighbors if do_refine is true.
9731184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // For most well-designed schemes do_refine will not be necessary.
9741184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (do_refine) {
9751184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    static const MV neighbors[4] = {
9761184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {0, -1}, { -1, 0}, {1, 0}, {0, 1},
9771184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    };
9781184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    for (j = 0; j < 16; j++) {
9791184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      best_site = -1;
9801184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      CHECK_BOUNDS(1)
9811184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      if (all_in) {
9821184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        for (i = 0; i < 4; i++) {
9831184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          this_mv.as_mv.row = br + neighbors[i].row;
9841184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          this_mv.as_mv.col = bc + neighbors[i].col;
9851184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
9861184aebb761cbeac9124c37189a80a1a58f04b6bhkuang              this_mv.as_mv.col;
9871184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
9881184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                             bestsad);
9891184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          CHECK_BETTER
9901184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        }
9911184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      } else {
9921184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        for (i = 0; i < 4; i++) {
9931184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          this_mv.as_mv.row = br + neighbors[i].row;
9941184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          this_mv.as_mv.col = bc + neighbors[i].col;
9951184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          CHECK_POINT
9961184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
9971184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                        this_mv.as_mv.col;
9981184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride,
9991184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                             bestsad);
10001184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          CHECK_BETTER
10011184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        }
10021184aebb761cbeac9124c37189a80a1a58f04b6bhkuang          }
1003ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
10041184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      if (best_site == -1) {
10051184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        break;
10061184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      } else {
10071184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        br += neighbors[best_site].row;
10081184aebb761cbeac9124c37189a80a1a58f04b6bhkuang        bc += neighbors[best_site].col;
1009ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1010ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1011ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
1012ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1013ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  best_mv->as_mv.row = br;
1014ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  best_mv->as_mv.col = bc;
1015ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
10161184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  this_offset = base_offset + (best_mv->as_mv.row * (in_what_stride)) +
10171184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      best_mv->as_mv.col;
10181184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  this_mv.as_mv.row = best_mv->as_mv.row << 3;
10191184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  this_mv.as_mv.col = best_mv->as_mv.col << 3;
10201184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  if (bestsad == INT_MAX)
10211184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    return INT_MAX;
10221184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  return
10231184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      vfp->vf(what, what_stride, this_offset, in_what_stride,
10241184aebb761cbeac9124c37189a80a1a58f04b6bhkuang              (unsigned int *)(&bestsad)) +
10251184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      use_mvcost ? mv_err_cost(&this_mv, center_mv, x->nmvjointcost, x->mvcost,
10261184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                               x->errorperbit) : 0;
10271184aebb761cbeac9124c37189a80a1a58f04b6bhkuang}
10281184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
10291184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
10301184aebb761cbeac9124c37189a80a1a58f04b6bhkuangint vp9_hex_search(MACROBLOCK *x,
10311184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                   int_mv *ref_mv,
10321184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                   int search_param,
10331184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                   int sad_per_bit,
10341184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                   int do_init_search,
10351184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                   const vp9_variance_fn_ptr_t *vfp,
10361184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                   int use_mvcost,
10371184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                   int_mv *center_mv, int_mv *best_mv) {
10381184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // First scale has 8-closest points, the rest have 6 points in hex shape
10391184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // at increasing scales
10401184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  static const int hex_num_candidates[MAX_PATTERN_SCALES] = {
10411184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6
10421184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  };
10431184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Note that the largest candidate step at each scale is 2^scale
10441184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
10451184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, { 0, 1}, { -1, 1}, {-1, 0}},
10461184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}},
10471184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-2, -4}, {2, -4}, {4, 0}, {2, 4}, { -2, 4}, { -4, 0}},
10481184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-4, -8}, {4, -8}, {8, 0}, {4, 8}, { -4, 8}, { -8, 0}},
10491184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-8, -16}, {8, -16}, {16, 0}, {8, 16}, { -8, 16}, { -16, 0}},
10501184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-16, -32}, {16, -32}, {32, 0}, {16, 32}, { -16, 32}, { -32, 0}},
10511184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-32, -64}, {32, -64}, {64, 0}, {32, 64}, { -32, 64}, { -64, 0}},
10521184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-64, -128}, {64, -128}, {128, 0}, {64, 128}, { -64, 128}, { -128, 0}},
10531184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-128, -256}, {128, -256}, {256, 0}, {128, 256}, { -128, 256}, { -256, 0}},
10541184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-256, -512}, {256, -512}, {512, 0}, {256, 512}, { -256, 512}, { -512, 0}},
10551184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-512, -1024}, {512, -1024}, {1024, 0}, {512, 1024}, { -512, 1024},
10561184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      { -1024, 0}},
10571184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  };
10581184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  return
10591184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
10601184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                         do_init_search, 0, vfp, use_mvcost,
10611184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                         center_mv, best_mv,
10621184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                         hex_num_candidates, hex_candidates);
10631184aebb761cbeac9124c37189a80a1a58f04b6bhkuang}
10641184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
10651184aebb761cbeac9124c37189a80a1a58f04b6bhkuangint vp9_bigdia_search(MACROBLOCK *x,
10661184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      int_mv *ref_mv,
10671184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      int search_param,
10681184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      int sad_per_bit,
10691184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      int do_init_search,
10701184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      const vp9_variance_fn_ptr_t *vfp,
10711184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      int use_mvcost,
10721184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      int_mv *center_mv,
10731184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      int_mv *best_mv) {
10741184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // First scale has 4-closest points, the rest have 8 points in diamond
10751184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // shape at increasing scales
10761184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
10771184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
10781184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  };
10791184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Note that the largest candidate step at each scale is 2^scale
10801184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  static const MV bigdia_candidates[MAX_PATTERN_SCALES]
10811184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                   [MAX_PATTERN_CANDIDATES] = {
10821184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{0, -1}, {1, 0}, { 0, 1}, {-1, 0}},
10831184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-1, -1}, {0, -2}, {1, -1}, {2, 0}, {1, 1}, {0, 2}, {-1, 1}, {-2, 0}},
10841184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-2, -2}, {0, -4}, {2, -2}, {4, 0}, {2, 2}, {0, 4}, {-2, 2}, {-4, 0}},
10851184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-4, -4}, {0, -8}, {4, -4}, {8, 0}, {4, 4}, {0, 8}, {-4, 4}, {-8, 0}},
10861184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-8, -8}, {0, -16}, {8, -8}, {16, 0}, {8, 8}, {0, 16}, {-8, 8}, {-16, 0}},
10871184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-16, -16}, {0, -32}, {16, -16}, {32, 0}, {16, 16}, {0, 32},
10881184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-16, 16}, {-32, 0}},
10891184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-32, -32}, {0, -64}, {32, -32}, {64, 0}, {32, 32}, {0, 64},
10901184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-32, 32}, {-64, 0}},
10911184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-64, -64}, {0, -128}, {64, -64}, {128, 0}, {64, 64}, {0, 128},
10921184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-64, 64}, {-128, 0}},
10931184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-128, -128}, {0, -256}, {128, -128}, {256, 0}, {128, 128}, {0, 256},
10941184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-128, 128}, {-256, 0}},
10951184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-256, -256}, {0, -512}, {256, -256}, {512, 0}, {256, 256}, {0, 512},
10961184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-256, 256}, {-512, 0}},
10971184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024},
10981184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-512, 512}, {-1024, 0}},
10991184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  };
11001184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  return
11011184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
11021184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                         do_init_search, 0, vfp, use_mvcost,
11031184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                         center_mv, best_mv,
11041184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                         bigdia_num_candidates, bigdia_candidates);
1105ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
11061184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
11071184aebb761cbeac9124c37189a80a1a58f04b6bhkuangint vp9_square_search(MACROBLOCK *x,
11081184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      int_mv *ref_mv,
11091184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      int search_param,
11101184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      int sad_per_bit,
11111184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      int do_init_search,
11121184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      const vp9_variance_fn_ptr_t *vfp,
11131184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      int use_mvcost,
11141184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      int_mv *center_mv,
11151184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                      int_mv *best_mv) {
11161184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // All scales have 8 closest points in square shape
11171184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  static const int square_num_candidates[MAX_PATTERN_SCALES] = {
11181184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
11191184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  };
11201184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Note that the largest candidate step at each scale is 2^scale
11211184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  static const MV square_candidates[MAX_PATTERN_SCALES]
11221184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                                   [MAX_PATTERN_CANDIDATES] = {
11231184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}},
11241184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-2, -2}, {0, -2}, {2, -2}, {2, 0}, {2, 2}, {0, 2}, {-2, 2}, {-2, 0}},
11251184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-4, -4}, {0, -4}, {4, -4}, {4, 0}, {4, 4}, {0, 4}, {-4, 4}, {-4, 0}},
11261184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-8, -8}, {0, -8}, {8, -8}, {8, 0}, {8, 8}, {0, 8}, {-8, 8}, {-8, 0}},
11271184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-16, -16}, {0, -16}, {16, -16}, {16, 0}, {16, 16}, {0, 16},
11281184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-16, 16}, {-16, 0}},
11291184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-32, -32}, {0, -32}, {32, -32}, {32, 0}, {32, 32}, {0, 32},
11301184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-32, 32}, {-32, 0}},
11311184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-64, -64}, {0, -64}, {64, -64}, {64, 0}, {64, 64}, {0, 64},
11321184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-64, 64}, {-64, 0}},
11331184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-128, -128}, {0, -128}, {128, -128}, {128, 0}, {128, 128}, {0, 128},
11341184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-128, 128}, {-128, 0}},
11351184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-256, -256}, {0, -256}, {256, -256}, {256, 0}, {256, 256}, {0, 256},
11361184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-256, 256}, {-256, 0}},
11371184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512},
11381184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {-512, 512}, {-512, 0}},
11391184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024},
11401184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      {0, 1024}, {-1024, 1024}, {-1024, 0}},
11411184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  };
11421184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  return
11431184aebb761cbeac9124c37189a80a1a58f04b6bhkuang      vp9_pattern_search(x, ref_mv, search_param, sad_per_bit,
11441184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                         do_init_search, 0, vfp, use_mvcost,
11451184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                         center_mv, best_mv,
11461184aebb761cbeac9124c37189a80a1a58f04b6bhkuang                         square_num_candidates, square_candidates);
11471184aebb761cbeac9124c37189a80a1a58f04b6bhkuang};
11481184aebb761cbeac9124c37189a80a1a58f04b6bhkuang
1149ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#undef CHECK_BOUNDS
1150ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#undef CHECK_POINT
1151ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#undef CHECK_BETTER
1152ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1153ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangint vp9_diamond_search_sad_c(MACROBLOCK *x,
1154ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                             int_mv *ref_mv, int_mv *best_mv,
1155ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                             int search_param, int sad_per_bit, int *num00,
1156ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                             vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
1157ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                             int *mvcost[2], int_mv *center_mv) {
1158ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int i, j, step;
1159ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1160ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const MACROBLOCKD* const xd = &x->e_mbd;
1161ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *what = x->plane[0].src.buf;
1162ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int what_stride = x->plane[0].src.stride;
1163ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *in_what;
1164ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int in_what_stride = xd->plane[0].pre[0].stride;
1165ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *best_address;
1166ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1167ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int tot_steps;
1168ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int_mv this_mv;
1169ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1170ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int bestsad = INT_MAX;
1171ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int best_site = 0;
1172ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int last_site = 0;
1173ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1174ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int ref_row, ref_col;
1175ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int this_row_offset, this_col_offset;
1176ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  search_site *ss;
1177ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1178ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *check_here;
1179ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int thissad;
1180ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int_mv fcenter_mv;
1181ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1182ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *mvjsadcost = x->nmvjointsadcost;
1183ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1184ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1185ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1186ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1187ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1188f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang  clamp_mv(&ref_mv->as_mv,
1189f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang           x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1190ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  ref_row = ref_mv->as_mv.row;
1191ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  ref_col = ref_mv->as_mv.col;
1192ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  *num00 = 0;
1193ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  best_mv->as_mv.row = ref_row;
1194ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  best_mv->as_mv.col = ref_col;
1195ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1196ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // Work out the start point for the search
1197ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  in_what = (uint8_t *)(xd->plane[0].pre[0].buf +
1198ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        (ref_row * (xd->plane[0].pre[0].stride)) + ref_col);
1199ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  best_address = in_what;
1200ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1201ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // Check the starting position
1202ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bestsad = fn_ptr->sdf(what, what_stride, in_what,
1203ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        in_what_stride, 0x7fffffff)
1204ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
1205ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                             sad_per_bit);
1206ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1207ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // search_param determines the length of the initial step and hence the number of iterations
1208ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1209ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  ss = &x->ss[search_param * x->searches_per_step];
1210ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1211ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1212ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  i = 1;
1213ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1214ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  for (step = 0; step < tot_steps; step++) {
1215ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    for (j = 0; j < x->searches_per_step; j++) {
1216ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      // Trap illegal vectors
1217ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1218ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1219ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
122091037db265ecdd914a26e056cf69207b4f50924ehkuang      if ((this_col_offset > x->mv_col_min) &&
122191037db265ecdd914a26e056cf69207b4f50924ehkuang          (this_col_offset < x->mv_col_max) &&
122291037db265ecdd914a26e056cf69207b4f50924ehkuang          (this_row_offset > x->mv_row_min) &&
122391037db265ecdd914a26e056cf69207b4f50924ehkuang          (this_row_offset < x->mv_row_max)) {
1224ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        check_here = ss[i].offset + best_address;
122591037db265ecdd914a26e056cf69207b4f50924ehkuang        thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
122691037db265ecdd914a26e056cf69207b4f50924ehkuang                              bestsad);
1227ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1228ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (thissad < bestsad) {
1229ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          this_mv.as_mv.row = this_row_offset;
1230ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          this_mv.as_mv.col = this_col_offset;
1231ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1232ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                    mvjsadcost, mvsadcost, sad_per_bit);
1233ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1234ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          if (thissad < bestsad) {
1235ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            bestsad = thissad;
1236ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            best_site = i;
1237ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          }
1238ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
1239ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1240ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1241ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      i++;
1242ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1243ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1244ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (best_site != last_site) {
1245ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      best_mv->as_mv.row += ss[best_site].mv.row;
1246ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      best_mv->as_mv.col += ss[best_site].mv.col;
1247ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      best_address += ss[best_site].offset;
1248ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      last_site = best_site;
124991037db265ecdd914a26e056cf69207b4f50924ehkuang#if defined(NEW_DIAMOND_SEARCH)
125091037db265ecdd914a26e056cf69207b4f50924ehkuang      while (1) {
125191037db265ecdd914a26e056cf69207b4f50924ehkuang        this_row_offset = best_mv->as_mv.row + ss[best_site].mv.row;
125291037db265ecdd914a26e056cf69207b4f50924ehkuang        this_col_offset = best_mv->as_mv.col + ss[best_site].mv.col;
125391037db265ecdd914a26e056cf69207b4f50924ehkuang        if ((this_col_offset > x->mv_col_min) &&
125491037db265ecdd914a26e056cf69207b4f50924ehkuang            (this_col_offset < x->mv_col_max) &&
125591037db265ecdd914a26e056cf69207b4f50924ehkuang            (this_row_offset > x->mv_row_min) &&
125691037db265ecdd914a26e056cf69207b4f50924ehkuang            (this_row_offset < x->mv_row_max)) {
125791037db265ecdd914a26e056cf69207b4f50924ehkuang          check_here = ss[best_site].offset + best_address;
125891037db265ecdd914a26e056cf69207b4f50924ehkuang          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
125991037db265ecdd914a26e056cf69207b4f50924ehkuang                                bestsad);
126091037db265ecdd914a26e056cf69207b4f50924ehkuang          if (thissad < bestsad) {
126191037db265ecdd914a26e056cf69207b4f50924ehkuang            this_mv.as_mv.row = this_row_offset;
126291037db265ecdd914a26e056cf69207b4f50924ehkuang            this_mv.as_mv.col = this_col_offset;
126391037db265ecdd914a26e056cf69207b4f50924ehkuang            thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
126491037db265ecdd914a26e056cf69207b4f50924ehkuang                                      mvjsadcost, mvsadcost, sad_per_bit);
126591037db265ecdd914a26e056cf69207b4f50924ehkuang            if (thissad < bestsad) {
126691037db265ecdd914a26e056cf69207b4f50924ehkuang              bestsad = thissad;
126791037db265ecdd914a26e056cf69207b4f50924ehkuang              best_mv->as_mv.row += ss[best_site].mv.row;
126891037db265ecdd914a26e056cf69207b4f50924ehkuang              best_mv->as_mv.col += ss[best_site].mv.col;
126991037db265ecdd914a26e056cf69207b4f50924ehkuang              best_address += ss[best_site].offset;
127091037db265ecdd914a26e056cf69207b4f50924ehkuang              continue;
127191037db265ecdd914a26e056cf69207b4f50924ehkuang            }
127291037db265ecdd914a26e056cf69207b4f50924ehkuang          }
127391037db265ecdd914a26e056cf69207b4f50924ehkuang        }
127491037db265ecdd914a26e056cf69207b4f50924ehkuang        break;
127591037db265ecdd914a26e056cf69207b4f50924ehkuang      };
127691037db265ecdd914a26e056cf69207b4f50924ehkuang#endif
1277ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    } else if (best_address == in_what)
1278ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      (*num00)++;
1279ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
1280ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1281ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  this_mv.as_mv.row = best_mv->as_mv.row << 3;
1282ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  this_mv.as_mv.col = best_mv->as_mv.col << 3;
1283ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1284ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (bestsad == INT_MAX)
1285ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return INT_MAX;
1286ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1287f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang  return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
1288f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang         (unsigned int *)(&thissad)) + mv_err_cost(&this_mv, center_mv, mvjcost,
1289f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang                                                   mvcost, x->errorperbit);
1290ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
1291ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1292ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangint vp9_diamond_search_sadx4(MACROBLOCK *x,
1293ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                             int_mv *ref_mv, int_mv *best_mv, int search_param,
1294ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                             int sad_per_bit, int *num00,
1295ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                             vp9_variance_fn_ptr_t *fn_ptr,
1296ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                             int *mvjcost, int *mvcost[2], int_mv *center_mv) {
1297ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int i, j, step;
1298ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1299ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const MACROBLOCKD* const xd = &x->e_mbd;
1300ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *what = x->plane[0].src.buf;
1301ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int what_stride = x->plane[0].src.stride;
1302ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *in_what;
1303ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int in_what_stride = xd->plane[0].pre[0].stride;
1304ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *best_address;
1305ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1306ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int tot_steps;
1307ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int_mv this_mv;
1308ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1309ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int bestsad = INT_MAX;
1310ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int best_site = 0;
1311ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int last_site = 0;
1312ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1313ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int ref_row;
1314ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int ref_col;
1315ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int this_row_offset;
1316ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int this_col_offset;
1317ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  search_site *ss;
1318ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1319ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *check_here;
1320ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int thissad;
1321ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int_mv fcenter_mv;
1322ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1323ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *mvjsadcost = x->nmvjointsadcost;
1324ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1325ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1326ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1327ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1328ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1329f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang  clamp_mv(&ref_mv->as_mv,
1330f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang           x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
1331ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  ref_row = ref_mv->as_mv.row;
1332ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  ref_col = ref_mv->as_mv.col;
1333ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  *num00 = 0;
1334ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  best_mv->as_mv.row = ref_row;
1335ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  best_mv->as_mv.col = ref_col;
1336ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1337ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // Work out the start point for the search
1338ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  in_what = (uint8_t *)(xd->plane[0].pre[0].buf +
1339ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        (ref_row * (xd->plane[0].pre[0].stride)) + ref_col);
1340ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  best_address = in_what;
1341ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1342ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // Check the starting position
1343ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bestsad = fn_ptr->sdf(what, what_stride,
1344ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        in_what, in_what_stride, 0x7fffffff)
1345ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
1346ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                             sad_per_bit);
1347ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1348ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // search_param determines the length of the initial step and hence the number of iterations
1349ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1350ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  ss = &x->ss[search_param * x->searches_per_step];
1351ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1352ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1353ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  i = 1;
1354ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1355ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  for (step = 0; step < tot_steps; step++) {
1356ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    int all_in = 1, t;
1357ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1358ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    // To know if all neighbor points are within the bounds, 4 bounds checking are enough instead of
1359ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    // checking 4 bounds for each points.
1360ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min);
1361ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max);
1362ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min);
1363ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max);
1364ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1365ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (all_in) {
1366ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      unsigned int sad_array[4];
1367ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1368ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      for (j = 0; j < x->searches_per_step; j += 4) {
1369ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        unsigned char const *block_offset[4];
1370ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1371ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        for (t = 0; t < 4; t++)
1372ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          block_offset[t] = ss[i + t].offset + best_address;
1373ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1374ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1375ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                       sad_array);
1376ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1377ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        for (t = 0; t < 4; t++, i++) {
1378ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          if (sad_array[t] < bestsad) {
1379ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
1380ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
1381ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
1382ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                           mvjsadcost, mvsadcost, sad_per_bit);
1383ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1384ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            if (sad_array[t] < bestsad) {
1385ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang              bestsad = sad_array[t];
1386ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang              best_site = i;
1387ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            }
1388ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          }
1389ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
1390ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1391ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    } else {
1392ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      for (j = 0; j < x->searches_per_step; j++) {
1393ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        // Trap illegal vectors
1394ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1395ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1396ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1397ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1398ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) {
1399ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          check_here = ss[i].offset + best_address;
1400ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
1401ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1402ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          if (thissad < bestsad) {
1403ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            this_mv.as_mv.row = this_row_offset;
1404ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            this_mv.as_mv.col = this_col_offset;
1405ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
1406ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                      mvjsadcost, mvsadcost, sad_per_bit);
1407ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1408ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            if (thissad < bestsad) {
1409ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang              bestsad = thissad;
1410ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang              best_site = i;
1411ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            }
1412ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          }
1413ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
1414ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        i++;
1415ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1416ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1417ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (best_site != last_site) {
1418ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      best_mv->as_mv.row += ss[best_site].mv.row;
1419ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      best_mv->as_mv.col += ss[best_site].mv.col;
1420ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      best_address += ss[best_site].offset;
1421ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      last_site = best_site;
142291037db265ecdd914a26e056cf69207b4f50924ehkuang#if defined(NEW_DIAMOND_SEARCH)
142391037db265ecdd914a26e056cf69207b4f50924ehkuang      while (1) {
142491037db265ecdd914a26e056cf69207b4f50924ehkuang        this_row_offset = best_mv->as_mv.row + ss[best_site].mv.row;
142591037db265ecdd914a26e056cf69207b4f50924ehkuang        this_col_offset = best_mv->as_mv.col + ss[best_site].mv.col;
142691037db265ecdd914a26e056cf69207b4f50924ehkuang        if ((this_col_offset > x->mv_col_min) &&
142791037db265ecdd914a26e056cf69207b4f50924ehkuang            (this_col_offset < x->mv_col_max) &&
142891037db265ecdd914a26e056cf69207b4f50924ehkuang            (this_row_offset > x->mv_row_min) &&
142991037db265ecdd914a26e056cf69207b4f50924ehkuang            (this_row_offset < x->mv_row_max)) {
143091037db265ecdd914a26e056cf69207b4f50924ehkuang          check_here = ss[best_site].offset + best_address;
143191037db265ecdd914a26e056cf69207b4f50924ehkuang          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
143291037db265ecdd914a26e056cf69207b4f50924ehkuang                                bestsad);
143391037db265ecdd914a26e056cf69207b4f50924ehkuang          if (thissad < bestsad) {
143491037db265ecdd914a26e056cf69207b4f50924ehkuang            this_mv.as_mv.row = this_row_offset;
143591037db265ecdd914a26e056cf69207b4f50924ehkuang            this_mv.as_mv.col = this_col_offset;
143691037db265ecdd914a26e056cf69207b4f50924ehkuang            thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
143791037db265ecdd914a26e056cf69207b4f50924ehkuang                                      mvjsadcost, mvsadcost, sad_per_bit);
143891037db265ecdd914a26e056cf69207b4f50924ehkuang            if (thissad < bestsad) {
143991037db265ecdd914a26e056cf69207b4f50924ehkuang              bestsad = thissad;
144091037db265ecdd914a26e056cf69207b4f50924ehkuang              best_mv->as_mv.row += ss[best_site].mv.row;
144191037db265ecdd914a26e056cf69207b4f50924ehkuang              best_mv->as_mv.col += ss[best_site].mv.col;
144291037db265ecdd914a26e056cf69207b4f50924ehkuang              best_address += ss[best_site].offset;
144391037db265ecdd914a26e056cf69207b4f50924ehkuang              continue;
144491037db265ecdd914a26e056cf69207b4f50924ehkuang            }
144591037db265ecdd914a26e056cf69207b4f50924ehkuang          }
144691037db265ecdd914a26e056cf69207b4f50924ehkuang        }
144791037db265ecdd914a26e056cf69207b4f50924ehkuang        break;
144891037db265ecdd914a26e056cf69207b4f50924ehkuang      };
144991037db265ecdd914a26e056cf69207b4f50924ehkuang#endif
1450ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    } else if (best_address == in_what)
1451ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      (*num00)++;
1452ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
1453ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1454ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  this_mv.as_mv.row = best_mv->as_mv.row << 3;
1455ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  this_mv.as_mv.col = best_mv->as_mv.col << 3;
1456ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1457ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (bestsad == INT_MAX)
1458ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return INT_MAX;
1459ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1460f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang  return fn_ptr->vf(what, what_stride, best_address, in_what_stride,
1461f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang                    (unsigned int *)(&thissad)) + mv_err_cost(&this_mv,
1462f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang                            center_mv, mvjcost, mvcost, x->errorperbit);
1463ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
1464ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1465ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/* do_refine: If last step (1-away) of n-step search doesn't pick the center
1466ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang              point as the best match, we will do a final 1-away diamond
1467ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang              refining search  */
146891037db265ecdd914a26e056cf69207b4f50924ehkuang
1469ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangint vp9_full_pixel_diamond(VP9_COMP *cpi, MACROBLOCK *x,
1470ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                           int_mv *mvp_full, int step_param,
1471ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                           int sadpb, int further_steps,
1472ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                           int do_refine, vp9_variance_fn_ptr_t *fn_ptr,
1473ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                           int_mv *ref_mv, int_mv *dst_mv) {
1474ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int_mv temp_mv;
1475ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int thissme, n, num00;
1476ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
1477ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                        step_param, sadpb, &num00,
1478ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                        fn_ptr, x->nmvjointcost,
1479ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                        x->mvcost, ref_mv);
1480ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  dst_mv->as_int = temp_mv.as_int;
1481ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1482ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  n = num00;
1483ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  num00 = 0;
1484ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1485ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  /* If there won't be more n-step search, check to see if refining search is needed. */
1486ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (n > further_steps)
1487ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    do_refine = 0;
1488ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1489ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  while (n < further_steps) {
1490ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    n++;
1491ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1492ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (num00)
1493ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      num00--;
1494ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    else {
1495ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
1496ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                        step_param + n, sadpb, &num00,
1497ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                        fn_ptr, x->nmvjointcost, x->mvcost,
1498ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                        ref_mv);
1499ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1500ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      /* check to see if refining search is needed. */
1501ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      if (num00 > (further_steps - n))
1502ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        do_refine = 0;
1503ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1504ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      if (thissme < bestsme) {
1505ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        bestsme = thissme;
1506ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        dst_mv->as_int = temp_mv.as_int;
1507ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1508ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1509ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
1510ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1511ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  /* final 1-away diamond refining search */
1512ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (do_refine == 1) {
1513ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    int search_range = 8;
1514ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    int_mv best_mv;
1515ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    best_mv.as_int = dst_mv->as_int;
1516ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range,
1517ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                       fn_ptr, x->nmvjointcost, x->mvcost,
1518ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                       ref_mv);
1519ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1520ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (thissme < bestsme) {
1521ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      bestsme = thissme;
1522ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      dst_mv->as_int = best_mv.as_int;
1523ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1524ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
1525ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  return bestsme;
1526ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
1527ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1528ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangint vp9_full_search_sad_c(MACROBLOCK *x, int_mv *ref_mv,
1529ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                          int sad_per_bit, int distance,
1530ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                          vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
1531ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                          int *mvcost[2],
1532ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                          int_mv *center_mv, int n) {
1533ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const MACROBLOCKD* const xd = &x->e_mbd;
1534ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *what = x->plane[0].src.buf;
1535ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int what_stride = x->plane[0].src.stride;
1536ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *in_what;
1537ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int in_what_stride = xd->plane[0].pre[0].stride;
1538ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int mv_stride = xd->plane[0].pre[0].stride;
1539ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *bestaddress;
15401184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0];
1541ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int_mv this_mv;
1542ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int bestsad = INT_MAX;
1543ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int r, c;
1544ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1545ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *check_here;
1546ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int thissad;
1547ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1548ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int ref_row = ref_mv->as_mv.row;
1549ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int ref_col = ref_mv->as_mv.col;
1550ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1551ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int row_min = ref_row - distance;
1552ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int row_max = ref_row + distance;
1553ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int col_min = ref_col - distance;
1554ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int col_max = ref_col + distance;
1555ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int_mv fcenter_mv;
1556ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1557ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *mvjsadcost = x->nmvjointsadcost;
1558ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1559ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1560ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1561ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1562ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1563ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // Work out the mid point for the search
1564ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  in_what = xd->plane[0].pre[0].buf;
1565ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col;
1566ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1567ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  best_mv->as_mv.row = ref_row;
1568ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  best_mv->as_mv.col = ref_col;
1569ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1570ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // Baseline value at the centre
1571ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bestsad = fn_ptr->sdf(what, what_stride, bestaddress,
1572ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        in_what_stride, 0x7fffffff)
1573ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
1574ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                             sad_per_bit);
1575ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
15761184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Apply further limits to prevent us looking using vectors that stretch
15771184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // beyond the UMV border
15781184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  col_min = MAX(col_min, x->mv_col_min);
15791184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  col_max = MIN(col_max, x->mv_col_max);
15801184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  row_min = MAX(row_min, x->mv_row_min);
15811184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  row_max = MIN(row_max, x->mv_row_max);
1582ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1583ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  for (r = row_min; r < row_max; r++) {
1584ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    this_mv.as_mv.row = r;
1585ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    check_here = r * mv_stride + in_what + col_min;
1586ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1587ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    for (c = col_min; c < col_max; c++) {
1588ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
1589ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1590ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      this_mv.as_mv.col = c;
1591ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
1592ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                 mvjsadcost, mvsadcost, sad_per_bit);
1593ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1594ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      if (thissad < bestsad) {
1595ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        bestsad = thissad;
1596ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        best_mv->as_mv.row = r;
1597ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        best_mv->as_mv.col = c;
1598ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        bestaddress = check_here;
1599ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1600ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1601ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      check_here++;
1602ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1603ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
1604ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1605ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  this_mv.as_mv.row = best_mv->as_mv.row << 3;
1606ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  this_mv.as_mv.col = best_mv->as_mv.col << 3;
1607ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1608ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (bestsad < INT_MAX)
1609ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return
1610ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
1611ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                   (unsigned int *)(&thissad)) +
1612f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang        mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
1613ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  else
1614ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return INT_MAX;
1615ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
1616ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1617ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangint vp9_full_search_sadx3(MACROBLOCK *x, int_mv *ref_mv,
1618ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                          int sad_per_bit, int distance,
1619ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                          vp9_variance_fn_ptr_t *fn_ptr, int *mvjcost,
1620ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                          int *mvcost[2], int_mv *center_mv, int n) {
1621ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const MACROBLOCKD* const xd = &x->e_mbd;
1622ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *what = x->plane[0].src.buf;
1623ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int what_stride = x->plane[0].src.stride;
1624ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *in_what;
1625ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int in_what_stride = xd->plane[0].pre[0].stride;
1626ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int mv_stride = xd->plane[0].pre[0].stride;
1627ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *bestaddress;
16281184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0];
1629ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int_mv this_mv;
1630ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int bestsad = INT_MAX;
1631ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int r, c;
1632ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1633ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *check_here;
1634ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int thissad;
1635ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1636ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int ref_row = ref_mv->as_mv.row;
1637ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int ref_col = ref_mv->as_mv.col;
1638ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1639ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int row_min = ref_row - distance;
1640ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int row_max = ref_row + distance;
1641ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int col_min = ref_col - distance;
1642ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int col_max = ref_col + distance;
1643ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1644ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int sad_array[3];
1645ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int_mv fcenter_mv;
1646ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1647ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *mvjsadcost = x->nmvjointsadcost;
1648ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1649ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1650ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1651ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1652ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1653ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // Work out the mid point for the search
1654ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  in_what = xd->plane[0].pre[0].buf;
1655ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col;
1656ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1657ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  best_mv->as_mv.row = ref_row;
1658ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  best_mv->as_mv.col = ref_col;
1659ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1660ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // Baseline value at the centre
1661ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bestsad = fn_ptr->sdf(what, what_stride,
1662ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        bestaddress, in_what_stride, 0x7fffffff)
1663ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
1664ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                             sad_per_bit);
1665ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
16661184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Apply further limits to prevent us looking using vectors that stretch
16671184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // beyond the UMV border
16681184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  col_min = MAX(col_min, x->mv_col_min);
16691184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  col_max = MIN(col_max, x->mv_col_max);
16701184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  row_min = MAX(row_min, x->mv_row_min);
16711184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  row_max = MIN(row_max, x->mv_row_max);
1672ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1673ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  for (r = row_min; r < row_max; r++) {
1674ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    this_mv.as_mv.row = r;
1675ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    check_here = r * mv_stride + in_what + col_min;
1676ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    c = col_min;
1677ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1678ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    while ((c + 2) < col_max) {
1679ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      int i;
1680ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1681ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1682ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1683ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      for (i = 0; i < 3; i++) {
1684ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        thissad = sad_array[i];
1685ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1686ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (thissad < bestsad) {
1687ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          this_mv.as_mv.col = c;
1688ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
1689ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                     mvjsadcost, mvsadcost, sad_per_bit);
1690ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1691ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          if (thissad < bestsad) {
1692ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            bestsad = thissad;
1693ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            best_mv->as_mv.row = r;
1694ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            best_mv->as_mv.col = c;
1695ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            bestaddress = check_here;
1696ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          }
1697ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
1698ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1699ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        check_here++;
1700ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        c++;
1701ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1702ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1703ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1704ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    while (c < col_max) {
1705ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
1706ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1707ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      if (thissad < bestsad) {
1708ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        this_mv.as_mv.col = c;
1709ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
1710ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                   mvjsadcost, mvsadcost, sad_per_bit);
1711ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1712ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (thissad < bestsad) {
1713ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          bestsad = thissad;
1714ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          best_mv->as_mv.row = r;
1715ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          best_mv->as_mv.col = c;
1716ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          bestaddress = check_here;
1717ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
1718ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1719ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1720ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      check_here++;
1721ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      c++;
1722ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1723ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1724ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
1725ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1726ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  this_mv.as_mv.row = best_mv->as_mv.row << 3;
1727ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  this_mv.as_mv.col = best_mv->as_mv.col << 3;
1728ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1729ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (bestsad < INT_MAX)
1730ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return
1731ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
1732ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                   (unsigned int *)(&thissad)) +
1733f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang        mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
1734ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  else
1735ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return INT_MAX;
1736ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
1737ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1738ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangint vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv,
1739ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                          int sad_per_bit, int distance,
1740ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                          vp9_variance_fn_ptr_t *fn_ptr,
1741ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                          int *mvjcost, int *mvcost[2],
1742ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                          int_mv *center_mv, int n) {
1743ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const MACROBLOCKD* const xd = &x->e_mbd;
1744ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *what = x->plane[0].src.buf;
1745ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int what_stride = x->plane[0].src.stride;
1746ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *in_what;
1747ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int in_what_stride = xd->plane[0].pre[0].stride;
1748ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int mv_stride = xd->plane[0].pre[0].stride;
1749ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *bestaddress;
17501184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  int_mv *best_mv = &x->e_mbd.mi_8x8[0]->bmi[n].as_mv[0];
1751ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int_mv this_mv;
1752ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int bestsad = INT_MAX;
1753ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int r, c;
1754ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1755ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *check_here;
1756ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int thissad;
1757ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1758ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int ref_row = ref_mv->as_mv.row;
1759ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int ref_col = ref_mv->as_mv.col;
1760ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1761ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int row_min = ref_row - distance;
1762ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int row_max = ref_row + distance;
1763ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int col_min = ref_col - distance;
1764ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int col_max = ref_col + distance;
1765ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1766ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8);
1767ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int sad_array[3];
1768ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int_mv fcenter_mv;
1769ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1770ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *mvjsadcost = x->nmvjointsadcost;
1771ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1772ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1773ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1774ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1775ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1776ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // Work out the mid point for the search
1777ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  in_what = xd->plane[0].pre[0].buf;
1778ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bestaddress = in_what + (ref_row * xd->plane[0].pre[0].stride) + ref_col;
1779ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1780ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  best_mv->as_mv.row = ref_row;
1781ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  best_mv->as_mv.col = ref_col;
1782ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1783ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  // Baseline value at the centre
1784ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bestsad = fn_ptr->sdf(what, what_stride,
1785ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                        bestaddress, in_what_stride, 0x7fffffff)
1786ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost,
1787ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                             sad_per_bit);
1788ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
17891184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // Apply further limits to prevent us looking using vectors that stretch
17901184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  // beyond the UMV border
17911184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  col_min = MAX(col_min, x->mv_col_min);
17921184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  col_max = MIN(col_max, x->mv_col_max);
17931184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  row_min = MAX(row_min, x->mv_row_min);
17941184aebb761cbeac9124c37189a80a1a58f04b6bhkuang  row_max = MIN(row_max, x->mv_row_max);
1795ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1796ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  for (r = row_min; r < row_max; r++) {
1797ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    this_mv.as_mv.row = r;
1798ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    check_here = r * mv_stride + in_what + col_min;
1799ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    c = col_min;
1800ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1801ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    while ((c + 7) < col_max) {
1802ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      int i;
1803ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1804ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
1805ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1806ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      for (i = 0; i < 8; i++) {
1807ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        thissad = (unsigned int)sad_array8[i];
1808ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1809ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (thissad < bestsad) {
1810ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          this_mv.as_mv.col = c;
1811ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
1812ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                     mvjsadcost, mvsadcost, sad_per_bit);
1813ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1814ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          if (thissad < bestsad) {
1815ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            bestsad = thissad;
1816ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            best_mv->as_mv.row = r;
1817ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            best_mv->as_mv.col = c;
1818ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            bestaddress = check_here;
1819ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          }
1820ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
1821ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1822ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        check_here++;
1823ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        c++;
1824ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1825ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1826ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
18271184aebb761cbeac9124c37189a80a1a58f04b6bhkuang    while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) {
1828ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      int i;
1829ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1830ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1831ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1832ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      for (i = 0; i < 3; i++) {
1833ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        thissad = sad_array[i];
1834ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1835ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (thissad < bestsad) {
1836ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          this_mv.as_mv.col = c;
1837ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
1838ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                     mvjsadcost, mvsadcost, sad_per_bit);
1839ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1840ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          if (thissad < bestsad) {
1841ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            bestsad = thissad;
1842ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            best_mv->as_mv.row = r;
1843ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            best_mv->as_mv.col = c;
1844ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            bestaddress = check_here;
1845ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          }
1846ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
1847ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1848ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        check_here++;
1849ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        c++;
1850ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1851ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1852ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1853ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    while (c < col_max) {
1854ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
1855ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1856ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      if (thissad < bestsad) {
1857ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        this_mv.as_mv.col = c;
1858ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
1859ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                   mvjsadcost, mvsadcost, sad_per_bit);
1860ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1861ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (thissad < bestsad) {
1862ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          bestsad = thissad;
1863ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          best_mv->as_mv.row = r;
1864ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          best_mv->as_mv.col = c;
1865ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          bestaddress = check_here;
1866ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
1867ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1868ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1869ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      check_here++;
1870ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      c++;
1871ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1872ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
1873ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1874ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  this_mv.as_mv.row = best_mv->as_mv.row << 3;
1875ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  this_mv.as_mv.col = best_mv->as_mv.col << 3;
1876ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1877ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (bestsad < INT_MAX)
1878ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return
1879ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        fn_ptr->vf(what, what_stride, bestaddress, in_what_stride,
1880ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                   (unsigned int *)(&thissad)) +
1881f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang        mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
1882ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  else
1883ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return INT_MAX;
1884ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
1885ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangint vp9_refining_search_sad_c(MACROBLOCK *x,
1886ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                              int_mv *ref_mv, int error_per_bit,
1887ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                              int search_range, vp9_variance_fn_ptr_t *fn_ptr,
1888ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                              int *mvjcost, int *mvcost[2], int_mv *center_mv) {
1889ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const MACROBLOCKD* const xd = &x->e_mbd;
1890ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
1891ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int i, j;
1892ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int this_row_offset, this_col_offset;
1893ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1894ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int what_stride = x->plane[0].src.stride;
1895ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int in_what_stride = xd->plane[0].pre[0].stride;
1896ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *what = x->plane[0].src.buf;
1897ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *best_address = xd->plane[0].pre[0].buf +
1898ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                          (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) +
1899ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                          ref_mv->as_mv.col;
1900ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *check_here;
1901ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int thissad;
1902ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int_mv this_mv;
1903ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int bestsad = INT_MAX;
1904ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int_mv fcenter_mv;
1905ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1906ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *mvjsadcost = x->nmvjointsadcost;
1907ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1908ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1909ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1910ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1911ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1912ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) +
1913ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
1914ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1915ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  for (i = 0; i < search_range; i++) {
1916ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    int best_site = -1;
1917ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1918ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    for (j = 0; j < 4; j++) {
1919ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1920ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1921ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1922ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
1923ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) {
1924ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + best_address;
1925ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
1926ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1927ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (thissad < bestsad) {
1928ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          this_mv.as_mv.row = this_row_offset;
1929ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          this_mv.as_mv.col = this_col_offset;
1930ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
1931ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                    mvsadcost, error_per_bit);
1932ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1933ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          if (thissad < bestsad) {
1934ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            bestsad = thissad;
1935ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            best_site = j;
1936ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          }
1937ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
1938ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
1939ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1940ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1941ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (best_site == -1)
1942ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
1943ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    else {
1944ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      ref_mv->as_mv.row += neighbors[best_site].row;
1945ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      ref_mv->as_mv.col += neighbors[best_site].col;
1946ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      best_address += (neighbors[best_site].row) * in_what_stride + neighbors[best_site].col;
1947ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
1948ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
1949ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1950ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  this_mv.as_mv.row = ref_mv->as_mv.row << 3;
1951ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  this_mv.as_mv.col = ref_mv->as_mv.col << 3;
1952ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1953ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (bestsad < INT_MAX)
1954ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return
1955ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        fn_ptr->vf(what, what_stride, best_address, in_what_stride,
1956ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                   (unsigned int *)(&thissad)) +
1957f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang        mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
1958ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  else
1959ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return INT_MAX;
1960ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
1961ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1962ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangint vp9_refining_search_sadx4(MACROBLOCK *x,
1963ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                              int_mv *ref_mv, int error_per_bit,
1964ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                              int search_range, vp9_variance_fn_ptr_t *fn_ptr,
1965ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                              int *mvjcost, int *mvcost[2], int_mv *center_mv) {
1966ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const MACROBLOCKD* const xd = &x->e_mbd;
1967ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
1968ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int i, j;
1969ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int this_row_offset, this_col_offset;
1970ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1971ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int what_stride = x->plane[0].src.stride;
1972ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int in_what_stride = xd->plane[0].pre[0].stride;
1973ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *what = x->plane[0].src.buf;
1974ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *best_address = xd->plane[0].pre[0].buf +
1975ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                          (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) +
1976ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                          ref_mv->as_mv.col;
1977ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *check_here;
1978ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int thissad;
1979ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int_mv this_mv;
1980ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int bestsad = INT_MAX;
1981ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int_mv fcenter_mv;
1982ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1983ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *mvjsadcost = x->nmvjointsadcost;
1984ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
1985ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1986ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1987ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1988ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1989ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) +
1990ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
1991ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1992ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  for (i = 0; i < search_range; i++) {
1993ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    int best_site = -1;
1994ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    int all_in = ((ref_mv->as_mv.row - 1) > x->mv_row_min) &
1995ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                 ((ref_mv->as_mv.row + 1) < x->mv_row_max) &
1996ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                 ((ref_mv->as_mv.col - 1) > x->mv_col_min) &
1997ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                 ((ref_mv->as_mv.col + 1) < x->mv_col_max);
1998ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
1999ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (all_in) {
2000ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      unsigned int sad_array[4];
2001ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      unsigned char const *block_offset[4];
2002ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      block_offset[0] = best_address - in_what_stride;
2003ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      block_offset[1] = best_address - 1;
2004ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      block_offset[2] = best_address + 1;
2005ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      block_offset[3] = best_address + in_what_stride;
2006ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2007ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
2008ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2009ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      for (j = 0; j < 4; j++) {
2010ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (sad_array[j] < bestsad) {
2011ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
2012ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
2013ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
2014ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                         mvsadcost, error_per_bit);
2015ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2016ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          if (sad_array[j] < bestsad) {
2017ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            bestsad = sad_array[j];
2018ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            best_site = j;
2019ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          }
2020ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
2021ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
2022ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    } else {
2023ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      for (j = 0; j < 4; j++) {
2024ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
2025ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
2026ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2027ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
2028ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) {
2029ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + best_address;
2030ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad);
2031ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2032ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          if (thissad < bestsad) {
2033ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            this_mv.as_mv.row = this_row_offset;
2034ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            this_mv.as_mv.col = this_col_offset;
2035ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
2036ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                      mvsadcost, error_per_bit);
2037ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2038ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            if (thissad < bestsad) {
2039ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang              bestsad = thissad;
2040ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang              best_site = j;
2041ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            }
2042ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          }
2043ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
2044ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
2045ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
2046ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2047ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (best_site == -1)
2048ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
2049ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    else {
2050ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      ref_mv->as_mv.row += neighbors[best_site].row;
2051ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      ref_mv->as_mv.col += neighbors[best_site].col;
2052ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      best_address += (neighbors[best_site].row) * in_what_stride + neighbors[best_site].col;
2053ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
2054ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
2055ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2056ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  this_mv.as_mv.row = ref_mv->as_mv.row << 3;
2057ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  this_mv.as_mv.col = ref_mv->as_mv.col << 3;
2058ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2059ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (bestsad < INT_MAX)
2060ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return
2061ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        fn_ptr->vf(what, what_stride, best_address, in_what_stride,
2062ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                   (unsigned int *)(&thissad)) +
2063f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang        mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
2064ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  else
2065ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return INT_MAX;
2066ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
2067ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2068ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang/* This function is called when we do joint motion search in comp_inter_inter
2069ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang * mode.
2070ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang */
2071ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuangint vp9_refining_search_8p_c(MACROBLOCK *x,
2072ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                             int_mv *ref_mv, int error_per_bit,
2073ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                             int search_range, vp9_variance_fn_ptr_t *fn_ptr,
2074ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                             int *mvjcost, int *mvcost[2], int_mv *center_mv,
2075ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                             const uint8_t *second_pred, int w, int h) {
2076ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  const MACROBLOCKD* const xd = &x->e_mbd;
2077ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
2078ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      {-1, -1}, {1, -1}, {-1, 1}, {1, 1}};
2079ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int i, j;
2080ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int this_row_offset, this_col_offset;
2081ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2082ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int what_stride = x->plane[0].src.stride;
2083ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int in_what_stride = xd->plane[0].pre[0].stride;
2084ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *what = x->plane[0].src.buf;
2085ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *best_address = xd->plane[0].pre[0].buf +
2086ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                          (ref_mv->as_mv.row * xd->plane[0].pre[0].stride) +
2087ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                          ref_mv->as_mv.col;
2088ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  uint8_t *check_here;
2089ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int thissad;
2090ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int_mv this_mv;
2091ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  unsigned int bestsad = INT_MAX;
2092ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int_mv fcenter_mv;
2093ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2094ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *mvjsadcost = x->nmvjointsadcost;
2095ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
2096ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2097ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
2098ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
2099ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2100ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  /* Get compound pred by averaging two pred blocks. */
210191037db265ecdd914a26e056cf69207b4f50924ehkuang  bestsad = fn_ptr->sdaf(what, what_stride, best_address, in_what_stride,
210291037db265ecdd914a26e056cf69207b4f50924ehkuang                         second_pred, 0x7fffffff) +
2103ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
2104ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2105ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  for (i = 0; i < search_range; i++) {
2106ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    int best_site = -1;
2107ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2108ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    for (j = 0; j < 8; j++) {
2109ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
2110ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
2111ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2112ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      if ((this_col_offset > x->mv_col_min) &&
2113ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          (this_col_offset < x->mv_col_max) &&
2114ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          (this_row_offset > x->mv_row_min) &&
2115ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          (this_row_offset < x->mv_row_max)) {
2116ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
2117ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            best_address;
2118ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2119ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        /* Get compound block and use it to calculate SAD. */
212091037db265ecdd914a26e056cf69207b4f50924ehkuang        thissad = fn_ptr->sdaf(what, what_stride, check_here, in_what_stride,
212191037db265ecdd914a26e056cf69207b4f50924ehkuang                               second_pred, bestsad);
2122ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2123ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        if (thissad < bestsad) {
2124ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          this_mv.as_mv.row = this_row_offset;
2125ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          this_mv.as_mv.col = this_col_offset;
2126ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost,
2127ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang                                    mvsadcost, error_per_bit);
2128ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2129ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          if (thissad < bestsad) {
2130ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            bestsad = thissad;
2131ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang            best_site = j;
2132ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          }
2133ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang        }
2134ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      }
2135ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
2136ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2137ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    if (best_site == -1) {
2138ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      break;
2139ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    } else {
2140ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      ref_mv->as_mv.row += neighbors[best_site].row;
2141ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      ref_mv->as_mv.col += neighbors[best_site].col;
2142ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang      best_address += (neighbors[best_site].row) * in_what_stride +
2143ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang          neighbors[best_site].col;
2144ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    }
2145ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
2146ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2147ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  this_mv.as_mv.row = ref_mv->as_mv.row << 3;
2148ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  this_mv.as_mv.col = ref_mv->as_mv.col << 3;
2149ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang
2150ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  if (bestsad < INT_MAX) {
215191037db265ecdd914a26e056cf69207b4f50924ehkuang    // FIXME(rbultje, yunqing): add full-pixel averaging variance functions
215291037db265ecdd914a26e056cf69207b4f50924ehkuang    // so we don't have to use the subpixel with xoff=0,yoff=0 here.
2153f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang    return fn_ptr->svaf(best_address, in_what_stride, 0, 0,
215491037db265ecdd914a26e056cf69207b4f50924ehkuang                               what, what_stride, (unsigned int *)(&thissad),
215591037db265ecdd914a26e056cf69207b4f50924ehkuang                               second_pred) +
2156f3bed9137f66ef693bd406e43b17e9a1114f1e14hkuang        mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit);
2157ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  } else {
2158ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang    return INT_MAX;
2159ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang  }
2160ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang}
2161