1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include <limits.h> 12#include <math.h> 13#include <stdio.h> 14 15#include "./vpx_config.h" 16 17#include "vpx_mem/vpx_mem.h" 18 19#include "vp9/common/vp9_common.h" 20 21#include "vp9/encoder/vp9_encoder.h" 22#include "vp9/encoder/vp9_mcomp.h" 23 24// #define NEW_DIAMOND_SEARCH 25 26static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf, 27 const MV *mv) { 28 return &buf->buf[mv->row * buf->stride + mv->col]; 29} 30 31void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) { 32 int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); 33 int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); 34 int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL; 35 int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL; 36 37 col_min = MAX(col_min, (MV_LOW >> 3) + 1); 38 row_min = MAX(row_min, (MV_LOW >> 3) + 1); 39 col_max = MIN(col_max, (MV_UPP >> 3) - 1); 40 row_max = MIN(row_max, (MV_UPP >> 3) - 1); 41 42 // Get intersection of UMV window and valid MV window to reduce # of checks 43 // in diamond search. 44 if (x->mv_col_min < col_min) 45 x->mv_col_min = col_min; 46 if (x->mv_col_max > col_max) 47 x->mv_col_max = col_max; 48 if (x->mv_row_min < row_min) 49 x->mv_row_min = row_min; 50 if (x->mv_row_max > row_max) 51 x->mv_row_max = row_max; 52} 53 54int vp9_init_search_range(int size) { 55 int sr = 0; 56 // Minimum search size no matter what the passed in value. 57 size = MAX(16, size); 58 59 while ((size << sr) < MAX_FULL_PEL_VAL) 60 sr++; 61 62 sr = MIN(sr, MAX_MVSEARCH_STEPS - 2); 63 return sr; 64} 65 66static INLINE int mv_cost(const MV *mv, 67 const int *joint_cost, int *const comp_cost[2]) { 68 return joint_cost[vp9_get_mv_joint(mv)] + 69 comp_cost[0][mv->row] + comp_cost[1][mv->col]; 70} 71 72int vp9_mv_bit_cost(const MV *mv, const MV *ref, 73 const int *mvjcost, int *mvcost[2], int weight) { 74 const MV diff = { mv->row - ref->row, 75 mv->col - ref->col }; 76 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7); 77} 78 79static int mv_err_cost(const MV *mv, const MV *ref, 80 const int *mvjcost, int *mvcost[2], 81 int error_per_bit) { 82 if (mvcost) { 83 const MV diff = { mv->row - ref->row, 84 mv->col - ref->col }; 85 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * 86 error_per_bit, 13); 87 } 88 return 0; 89} 90 91static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref, 92 int error_per_bit) { 93 if (x->nmvsadcost) { 94 const MV diff = { mv->row - ref->row, 95 mv->col - ref->col }; 96 return ROUND_POWER_OF_TWO(mv_cost(&diff, x->nmvjointsadcost, 97 x->nmvsadcost) * error_per_bit, 8); 98 } 99 return 0; 100} 101 102void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) { 103 int len, ss_count = 1; 104 105 cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0; 106 cfg->ss[0].offset = 0; 107 108 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { 109 // Generate offsets for 4 search sites per step. 110 const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}}; 111 int i; 112 for (i = 0; i < 4; ++i) { 113 search_site *const ss = &cfg->ss[ss_count++]; 114 ss->mv = ss_mvs[i]; 115 ss->offset = ss->mv.row * stride + ss->mv.col; 116 } 117 } 118 119 cfg->ss_count = ss_count; 120 cfg->searches_per_step = 4; 121} 122 123void vp9_init3smotion_compensation(search_site_config *cfg, int stride) { 124 int len, ss_count = 1; 125 126 cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0; 127 cfg->ss[0].offset = 0; 128 129 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { 130 // Generate offsets for 8 search sites per step. 131 const MV ss_mvs[8] = { 132 {-len, 0 }, {len, 0 }, { 0, -len}, {0, len}, 133 {-len, -len}, {-len, len}, {len, -len}, {len, len} 134 }; 135 int i; 136 for (i = 0; i < 8; ++i) { 137 search_site *const ss = &cfg->ss[ss_count++]; 138 ss->mv = ss_mvs[i]; 139 ss->offset = ss->mv.row * stride + ss->mv.col; 140 } 141 } 142 143 cfg->ss_count = ss_count; 144 cfg->searches_per_step = 8; 145} 146 147/* 148 * To avoid the penalty for crossing cache-line read, preload the reference 149 * area in a small buffer, which is aligned to make sure there won't be crossing 150 * cache-line read while reading from this buffer. This reduced the cpu 151 * cycles spent on reading ref data in sub-pixel filter functions. 152 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x 153 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we 154 * could reduce the area. 155 */ 156 157/* estimated cost of a motion vector (r,c) */ 158#define MVC(r, c) \ 159 (mvcost ? \ 160 ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \ 161 mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \ 162 error_per_bit + 4096) >> 13 : 0) 163 164 165// convert motion vector component to offset for svf calc 166static INLINE int sp(int x) { 167 return (x & 7) << 1; 168} 169 170static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { 171 return &buf[(r >> 3) * stride + (c >> 3)]; 172} 173 174/* checks if (r, c) has better score than previous best */ 175#define CHECK_BETTER(v, r, c) \ 176 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ 177 if (second_pred == NULL) \ 178 thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ 179 src_stride, &sse); \ 180 else \ 181 thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \ 182 z, src_stride, &sse, second_pred); \ 183 if ((v = MVC(r, c) + thismse) < besterr) { \ 184 besterr = v; \ 185 br = r; \ 186 bc = c; \ 187 *distortion = thismse; \ 188 *sse1 = sse; \ 189 } \ 190 } else { \ 191 v = INT_MAX; \ 192 } 193 194#define FIRST_LEVEL_CHECKS \ 195 { \ 196 unsigned int left, right, up, down, diag; \ 197 CHECK_BETTER(left, tr, tc - hstep); \ 198 CHECK_BETTER(right, tr, tc + hstep); \ 199 CHECK_BETTER(up, tr - hstep, tc); \ 200 CHECK_BETTER(down, tr + hstep, tc); \ 201 whichdir = (left < right ? 0 : 1) + \ 202 (up < down ? 0 : 2); \ 203 switch (whichdir) { \ 204 case 0: \ 205 CHECK_BETTER(diag, tr - hstep, tc - hstep); \ 206 break; \ 207 case 1: \ 208 CHECK_BETTER(diag, tr - hstep, tc + hstep); \ 209 break; \ 210 case 2: \ 211 CHECK_BETTER(diag, tr + hstep, tc - hstep); \ 212 break; \ 213 case 3: \ 214 CHECK_BETTER(diag, tr + hstep, tc + hstep); \ 215 break; \ 216 } \ 217 } 218 219#define SECOND_LEVEL_CHECKS \ 220 { \ 221 int kr, kc; \ 222 unsigned int second; \ 223 if (tr != br && tc != bc) { \ 224 kr = br - tr; \ 225 kc = bc - tc; \ 226 CHECK_BETTER(second, tr + kr, tc + 2 * kc); \ 227 CHECK_BETTER(second, tr + 2 * kr, tc + kc); \ 228 } else if (tr == br && tc != bc) { \ 229 kc = bc - tc; \ 230 CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \ 231 CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \ 232 switch (whichdir) { \ 233 case 0: \ 234 case 1: \ 235 CHECK_BETTER(second, tr + hstep, tc + kc); \ 236 break; \ 237 case 2: \ 238 case 3: \ 239 CHECK_BETTER(second, tr - hstep, tc + kc); \ 240 break; \ 241 } \ 242 } else if (tr != br && tc == bc) { \ 243 kr = br - tr; \ 244 CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \ 245 CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \ 246 switch (whichdir) { \ 247 case 0: \ 248 case 2: \ 249 CHECK_BETTER(second, tr + kr, tc + hstep); \ 250 break; \ 251 case 1: \ 252 case 3: \ 253 CHECK_BETTER(second, tr + kr, tc - hstep); \ 254 break; \ 255 } \ 256 } \ 257 } 258 259int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, 260 MV *bestmv, const MV *ref_mv, 261 int allow_hp, 262 int error_per_bit, 263 const vp9_variance_fn_ptr_t *vfp, 264 int forced_stop, 265 int iters_per_step, 266 int *mvjcost, int *mvcost[2], 267 int *distortion, 268 unsigned int *sse1, 269 const uint8_t *second_pred, 270 int w, int h) { 271 const uint8_t *const z = x->plane[0].src.buf; 272 const int src_stride = x->plane[0].src.stride; 273 const MACROBLOCKD *xd = &x->e_mbd; 274 unsigned int besterr = INT_MAX; 275 unsigned int sse; 276 unsigned int whichdir; 277 int thismse; 278 const unsigned int halfiters = iters_per_step; 279 const unsigned int quarteriters = iters_per_step; 280 const unsigned int eighthiters = iters_per_step; 281 282 const int y_stride = xd->plane[0].pre[0].stride; 283 const int offset = bestmv->row * y_stride + bestmv->col; 284 const uint8_t *const y = xd->plane[0].pre[0].buf; 285 286 int rr = ref_mv->row; 287 int rc = ref_mv->col; 288 int br = bestmv->row * 8; 289 int bc = bestmv->col * 8; 290 int hstep = 4; 291 const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); 292 const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); 293 const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); 294 const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); 295 296 int tr = br; 297 int tc = bc; 298 299 // central mv 300 bestmv->row *= 8; 301 bestmv->col *= 8; 302 303 // calculate central point error 304 // TODO(yunqingwang): central pointer error was already calculated in full- 305 // pixel search, and can be passed in this function. 306 if (second_pred != NULL) { 307 DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); 308 vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); 309 besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); 310 } else { 311 besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1); 312 } 313 *distortion = besterr; 314 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 315 316 // Each subsequent iteration checks at least one point in 317 // common with the last iteration could be 2 ( if diag selected) 318 // 1/2 pel 319 FIRST_LEVEL_CHECKS; 320 if (halfiters > 1) { 321 SECOND_LEVEL_CHECKS; 322 } 323 tr = br; 324 tc = bc; 325 326 // Each subsequent iteration checks at least one point in common with 327 // the last iteration could be 2 ( if diag selected) 1/4 pel 328 329 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 330 if (forced_stop != 2) { 331 hstep >>= 1; 332 FIRST_LEVEL_CHECKS; 333 if (quarteriters > 1) { 334 SECOND_LEVEL_CHECKS; 335 } 336 tr = br; 337 tc = bc; 338 } 339 340 if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { 341 hstep >>= 1; 342 FIRST_LEVEL_CHECKS; 343 if (eighthiters > 1) { 344 SECOND_LEVEL_CHECKS; 345 } 346 tr = br; 347 tc = bc; 348 } 349 // These lines insure static analysis doesn't warn that 350 // tr and tc aren't used after the above point. 351 (void) tr; 352 (void) tc; 353 354 bestmv->row = br; 355 bestmv->col = bc; 356 357 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || 358 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) 359 return INT_MAX; 360 361 return besterr; 362} 363 364#undef MVC 365#undef PRE 366#undef CHECK_BETTER 367 368static INLINE int check_bounds(const MACROBLOCK *x, int row, int col, 369 int range) { 370 return ((row - range) >= x->mv_row_min) & 371 ((row + range) <= x->mv_row_max) & 372 ((col - range) >= x->mv_col_min) & 373 ((col + range) <= x->mv_col_max); 374} 375 376static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) { 377 return (mv->col >= x->mv_col_min) && (mv->col <= x->mv_col_max) && 378 (mv->row >= x->mv_row_min) && (mv->row <= x->mv_row_max); 379} 380 381#define CHECK_BETTER \ 382 {\ 383 if (thissad < bestsad) {\ 384 if (use_mvcost) \ 385 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);\ 386 if (thissad < bestsad) {\ 387 bestsad = thissad;\ 388 best_site = i;\ 389 }\ 390 }\ 391 } 392 393#define MAX_PATTERN_SCALES 11 394#define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale 395#define PATTERN_CANDIDATES_REF 3 // number of refinement candidates 396 397// Generic pattern search function that searches over multiple scales. 398// Each scale can have a different number of candidates and shape of 399// candidates as indicated in the num_candidates and candidates arrays 400// passed into this function 401static int vp9_pattern_search(const MACROBLOCK *x, 402 MV *ref_mv, 403 int search_param, 404 int sad_per_bit, 405 int do_init_search, int do_refine, 406 const vp9_variance_fn_ptr_t *vfp, 407 int use_mvcost, 408 const MV *center_mv, MV *best_mv, 409 const int num_candidates[MAX_PATTERN_SCALES], 410 const MV candidates[MAX_PATTERN_SCALES] 411 [MAX_PATTERN_CANDIDATES]) { 412 const MACROBLOCKD *const xd = &x->e_mbd; 413 static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { 414 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 415 }; 416 int i, j, s, t; 417 const struct buf_2d *const what = &x->plane[0].src; 418 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 419 int br, bc; 420 int bestsad = INT_MAX; 421 int thissad; 422 int k = -1; 423 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 424 int best_init_s = search_param_to_steps[search_param]; 425 // adjust ref_mv to make sure it is within MV range 426 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 427 br = ref_mv->row; 428 bc = ref_mv->col; 429 430 // Work out the start point for the search 431 bestsad = vfp->sdf(what->buf, what->stride, 432 get_buf_from_mv(in_what, ref_mv), in_what->stride) + 433 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); 434 435 // Search all possible scales upto the search param around the center point 436 // pick the scale of the point that is best as the starting scale of 437 // further steps around it. 438 if (do_init_search) { 439 s = best_init_s; 440 best_init_s = -1; 441 for (t = 0; t <= s; ++t) { 442 int best_site = -1; 443 if (check_bounds(x, br, bc, 1 << t)) { 444 for (i = 0; i < num_candidates[t]; i++) { 445 const MV this_mv = {br + candidates[t][i].row, 446 bc + candidates[t][i].col}; 447 thissad = vfp->sdf(what->buf, what->stride, 448 get_buf_from_mv(in_what, &this_mv), 449 in_what->stride); 450 CHECK_BETTER 451 } 452 } else { 453 for (i = 0; i < num_candidates[t]; i++) { 454 const MV this_mv = {br + candidates[t][i].row, 455 bc + candidates[t][i].col}; 456 if (!is_mv_in(x, &this_mv)) 457 continue; 458 thissad = vfp->sdf(what->buf, what->stride, 459 get_buf_from_mv(in_what, &this_mv), 460 in_what->stride); 461 CHECK_BETTER 462 } 463 } 464 if (best_site == -1) { 465 continue; 466 } else { 467 best_init_s = t; 468 k = best_site; 469 } 470 } 471 if (best_init_s != -1) { 472 br += candidates[best_init_s][k].row; 473 bc += candidates[best_init_s][k].col; 474 } 475 } 476 477 // If the center point is still the best, just skip this and move to 478 // the refinement step. 479 if (best_init_s != -1) { 480 int best_site = -1; 481 s = best_init_s; 482 483 do { 484 // No need to search all 6 points the 1st time if initial search was used 485 if (!do_init_search || s != best_init_s) { 486 if (check_bounds(x, br, bc, 1 << s)) { 487 for (i = 0; i < num_candidates[s]; i++) { 488 const MV this_mv = {br + candidates[s][i].row, 489 bc + candidates[s][i].col}; 490 thissad = vfp->sdf(what->buf, what->stride, 491 get_buf_from_mv(in_what, &this_mv), 492 in_what->stride); 493 CHECK_BETTER 494 } 495 } else { 496 for (i = 0; i < num_candidates[s]; i++) { 497 const MV this_mv = {br + candidates[s][i].row, 498 bc + candidates[s][i].col}; 499 if (!is_mv_in(x, &this_mv)) 500 continue; 501 thissad = vfp->sdf(what->buf, what->stride, 502 get_buf_from_mv(in_what, &this_mv), 503 in_what->stride); 504 CHECK_BETTER 505 } 506 } 507 508 if (best_site == -1) { 509 continue; 510 } else { 511 br += candidates[s][best_site].row; 512 bc += candidates[s][best_site].col; 513 k = best_site; 514 } 515 } 516 517 do { 518 int next_chkpts_indices[PATTERN_CANDIDATES_REF]; 519 best_site = -1; 520 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; 521 next_chkpts_indices[1] = k; 522 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; 523 524 if (check_bounds(x, br, bc, 1 << s)) { 525 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 526 const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, 527 bc + candidates[s][next_chkpts_indices[i]].col}; 528 thissad = vfp->sdf(what->buf, what->stride, 529 get_buf_from_mv(in_what, &this_mv), 530 in_what->stride); 531 CHECK_BETTER 532 } 533 } else { 534 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 535 const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, 536 bc + candidates[s][next_chkpts_indices[i]].col}; 537 if (!is_mv_in(x, &this_mv)) 538 continue; 539 thissad = vfp->sdf(what->buf, what->stride, 540 get_buf_from_mv(in_what, &this_mv), 541 in_what->stride); 542 CHECK_BETTER 543 } 544 } 545 546 if (best_site != -1) { 547 k = next_chkpts_indices[best_site]; 548 br += candidates[s][k].row; 549 bc += candidates[s][k].col; 550 } 551 } while (best_site != -1); 552 } while (s--); 553 } 554 555 // Check 4 1-away neighbors if do_refine is true. 556 // For most well-designed schemes do_refine will not be necessary. 557 if (do_refine) { 558 static const MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}}; 559 560 for (j = 0; j < 16; j++) { 561 int best_site = -1; 562 if (check_bounds(x, br, bc, 1)) { 563 for (i = 0; i < 4; i++) { 564 const MV this_mv = {br + neighbors[i].row, 565 bc + neighbors[i].col}; 566 thissad = vfp->sdf(what->buf, what->stride, 567 get_buf_from_mv(in_what, &this_mv), 568 in_what->stride); 569 CHECK_BETTER 570 } 571 } else { 572 for (i = 0; i < 4; i++) { 573 const MV this_mv = {br + neighbors[i].row, 574 bc + neighbors[i].col}; 575 if (!is_mv_in(x, &this_mv)) 576 continue; 577 thissad = vfp->sdf(what->buf, what->stride, 578 get_buf_from_mv(in_what, &this_mv), 579 in_what->stride); 580 CHECK_BETTER 581 } 582 } 583 584 if (best_site == -1) { 585 break; 586 } else { 587 br += neighbors[best_site].row; 588 bc += neighbors[best_site].col; 589 } 590 } 591 } 592 593 best_mv->row = br; 594 best_mv->col = bc; 595 596 return bestsad; 597} 598 599int vp9_get_mvpred_var(const MACROBLOCK *x, 600 const MV *best_mv, const MV *center_mv, 601 const vp9_variance_fn_ptr_t *vfp, 602 int use_mvcost) { 603 const MACROBLOCKD *const xd = &x->e_mbd; 604 const struct buf_2d *const what = &x->plane[0].src; 605 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 606 const MV mv = {best_mv->row * 8, best_mv->col * 8}; 607 unsigned int unused; 608 609 return vfp->vf(what->buf, what->stride, 610 get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) + 611 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, 612 x->mvcost, x->errorperbit) : 0); 613} 614 615int vp9_get_mvpred_av_var(const MACROBLOCK *x, 616 const MV *best_mv, const MV *center_mv, 617 const uint8_t *second_pred, 618 const vp9_variance_fn_ptr_t *vfp, 619 int use_mvcost) { 620 const MACROBLOCKD *const xd = &x->e_mbd; 621 const struct buf_2d *const what = &x->plane[0].src; 622 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 623 const MV mv = {best_mv->row * 8, best_mv->col * 8}; 624 unsigned int unused; 625 626 return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0, 627 what->buf, what->stride, &unused, second_pred) + 628 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, 629 x->mvcost, x->errorperbit) : 0); 630} 631 632int vp9_hex_search(const MACROBLOCK *x, 633 MV *ref_mv, 634 int search_param, 635 int sad_per_bit, 636 int do_init_search, 637 const vp9_variance_fn_ptr_t *vfp, 638 int use_mvcost, 639 const MV *center_mv, MV *best_mv) { 640 // First scale has 8-closest points, the rest have 6 points in hex shape 641 // at increasing scales 642 static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 643 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 644 }; 645 // Note that the largest candidate step at each scale is 2^scale 646 static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { 647 {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, { 0, 1}, { -1, 1}, {-1, 0}}, 648 {{-1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}}, 649 {{-2, -4}, {2, -4}, {4, 0}, {2, 4}, { -2, 4}, { -4, 0}}, 650 {{-4, -8}, {4, -8}, {8, 0}, {4, 8}, { -4, 8}, { -8, 0}}, 651 {{-8, -16}, {8, -16}, {16, 0}, {8, 16}, { -8, 16}, { -16, 0}}, 652 {{-16, -32}, {16, -32}, {32, 0}, {16, 32}, { -16, 32}, { -32, 0}}, 653 {{-32, -64}, {32, -64}, {64, 0}, {32, 64}, { -32, 64}, { -64, 0}}, 654 {{-64, -128}, {64, -128}, {128, 0}, {64, 128}, { -64, 128}, { -128, 0}}, 655 {{-128, -256}, {128, -256}, {256, 0}, {128, 256}, { -128, 256}, { -256, 0}}, 656 {{-256, -512}, {256, -512}, {512, 0}, {256, 512}, { -256, 512}, { -512, 0}}, 657 {{-512, -1024}, {512, -1024}, {1024, 0}, {512, 1024}, { -512, 1024}, 658 { -1024, 0}}, 659 }; 660 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, 661 do_init_search, 0, vfp, use_mvcost, 662 center_mv, best_mv, 663 hex_num_candidates, hex_candidates); 664} 665 666int vp9_bigdia_search(const MACROBLOCK *x, 667 MV *ref_mv, 668 int search_param, 669 int sad_per_bit, 670 int do_init_search, 671 const vp9_variance_fn_ptr_t *vfp, 672 int use_mvcost, 673 const MV *center_mv, 674 MV *best_mv) { 675 // First scale has 4-closest points, the rest have 8 points in diamond 676 // shape at increasing scales 677 static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = { 678 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 679 }; 680 // Note that the largest candidate step at each scale is 2^scale 681 static const MV bigdia_candidates[MAX_PATTERN_SCALES] 682 [MAX_PATTERN_CANDIDATES] = { 683 {{0, -1}, {1, 0}, { 0, 1}, {-1, 0}}, 684 {{-1, -1}, {0, -2}, {1, -1}, {2, 0}, {1, 1}, {0, 2}, {-1, 1}, {-2, 0}}, 685 {{-2, -2}, {0, -4}, {2, -2}, {4, 0}, {2, 2}, {0, 4}, {-2, 2}, {-4, 0}}, 686 {{-4, -4}, {0, -8}, {4, -4}, {8, 0}, {4, 4}, {0, 8}, {-4, 4}, {-8, 0}}, 687 {{-8, -8}, {0, -16}, {8, -8}, {16, 0}, {8, 8}, {0, 16}, {-8, 8}, {-16, 0}}, 688 {{-16, -16}, {0, -32}, {16, -16}, {32, 0}, {16, 16}, {0, 32}, 689 {-16, 16}, {-32, 0}}, 690 {{-32, -32}, {0, -64}, {32, -32}, {64, 0}, {32, 32}, {0, 64}, 691 {-32, 32}, {-64, 0}}, 692 {{-64, -64}, {0, -128}, {64, -64}, {128, 0}, {64, 64}, {0, 128}, 693 {-64, 64}, {-128, 0}}, 694 {{-128, -128}, {0, -256}, {128, -128}, {256, 0}, {128, 128}, {0, 256}, 695 {-128, 128}, {-256, 0}}, 696 {{-256, -256}, {0, -512}, {256, -256}, {512, 0}, {256, 256}, {0, 512}, 697 {-256, 256}, {-512, 0}}, 698 {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024}, 699 {-512, 512}, {-1024, 0}}, 700 }; 701 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, 702 do_init_search, 0, vfp, use_mvcost, 703 center_mv, best_mv, 704 bigdia_num_candidates, bigdia_candidates); 705} 706 707int vp9_square_search(const MACROBLOCK *x, 708 MV *ref_mv, 709 int search_param, 710 int sad_per_bit, 711 int do_init_search, 712 const vp9_variance_fn_ptr_t *vfp, 713 int use_mvcost, 714 const MV *center_mv, 715 MV *best_mv) { 716 // All scales have 8 closest points in square shape 717 static const int square_num_candidates[MAX_PATTERN_SCALES] = { 718 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 719 }; 720 // Note that the largest candidate step at each scale is 2^scale 721 static const MV square_candidates[MAX_PATTERN_SCALES] 722 [MAX_PATTERN_CANDIDATES] = { 723 {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}}, 724 {{-2, -2}, {0, -2}, {2, -2}, {2, 0}, {2, 2}, {0, 2}, {-2, 2}, {-2, 0}}, 725 {{-4, -4}, {0, -4}, {4, -4}, {4, 0}, {4, 4}, {0, 4}, {-4, 4}, {-4, 0}}, 726 {{-8, -8}, {0, -8}, {8, -8}, {8, 0}, {8, 8}, {0, 8}, {-8, 8}, {-8, 0}}, 727 {{-16, -16}, {0, -16}, {16, -16}, {16, 0}, {16, 16}, {0, 16}, 728 {-16, 16}, {-16, 0}}, 729 {{-32, -32}, {0, -32}, {32, -32}, {32, 0}, {32, 32}, {0, 32}, 730 {-32, 32}, {-32, 0}}, 731 {{-64, -64}, {0, -64}, {64, -64}, {64, 0}, {64, 64}, {0, 64}, 732 {-64, 64}, {-64, 0}}, 733 {{-128, -128}, {0, -128}, {128, -128}, {128, 0}, {128, 128}, {0, 128}, 734 {-128, 128}, {-128, 0}}, 735 {{-256, -256}, {0, -256}, {256, -256}, {256, 0}, {256, 256}, {0, 256}, 736 {-256, 256}, {-256, 0}}, 737 {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512}, 738 {-512, 512}, {-512, 0}}, 739 {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024}, 740 {0, 1024}, {-1024, 1024}, {-1024, 0}}, 741 }; 742 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, 743 do_init_search, 0, vfp, use_mvcost, 744 center_mv, best_mv, 745 square_num_candidates, square_candidates); 746} 747 748int vp9_fast_hex_search(const MACROBLOCK *x, 749 MV *ref_mv, 750 int search_param, 751 int sad_per_bit, 752 int do_init_search, // must be zero for fast_hex 753 const vp9_variance_fn_ptr_t *vfp, 754 int use_mvcost, 755 const MV *center_mv, 756 MV *best_mv) { 757 return vp9_hex_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param), 758 sad_per_bit, do_init_search, vfp, use_mvcost, 759 center_mv, best_mv); 760} 761 762int vp9_fast_dia_search(const MACROBLOCK *x, 763 MV *ref_mv, 764 int search_param, 765 int sad_per_bit, 766 int do_init_search, 767 const vp9_variance_fn_ptr_t *vfp, 768 int use_mvcost, 769 const MV *center_mv, 770 MV *best_mv) { 771 return vp9_bigdia_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param), 772 sad_per_bit, do_init_search, vfp, use_mvcost, 773 center_mv, best_mv); 774} 775 776#undef CHECK_BETTER 777 778int vp9_full_range_search_c(const MACROBLOCK *x, 779 const search_site_config *cfg, 780 MV *ref_mv, MV *best_mv, 781 int search_param, int sad_per_bit, int *num00, 782 const vp9_variance_fn_ptr_t *fn_ptr, 783 const MV *center_mv) { 784 const MACROBLOCKD *const xd = &x->e_mbd; 785 const struct buf_2d *const what = &x->plane[0].src; 786 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 787 const int range = 64; 788 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 789 unsigned int best_sad = INT_MAX; 790 int r, c, i; 791 int start_col, end_col, start_row, end_row; 792 793 // The cfg and search_param parameters are not used in this search variant 794 (void)cfg; 795 (void)search_param; 796 797 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 798 *best_mv = *ref_mv; 799 *num00 = 11; 800 best_sad = fn_ptr->sdf(what->buf, what->stride, 801 get_buf_from_mv(in_what, ref_mv), in_what->stride) + 802 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); 803 start_row = MAX(-range, x->mv_row_min - ref_mv->row); 804 start_col = MAX(-range, x->mv_col_min - ref_mv->col); 805 end_row = MIN(range, x->mv_row_max - ref_mv->row); 806 end_col = MIN(range, x->mv_col_max - ref_mv->col); 807 808 for (r = start_row; r <= end_row; ++r) { 809 for (c = start_col; c <= end_col; c += 4) { 810 if (c + 3 <= end_col) { 811 unsigned int sads[4]; 812 const uint8_t *addrs[4]; 813 for (i = 0; i < 4; ++i) { 814 const MV mv = {ref_mv->row + r, ref_mv->col + c + i}; 815 addrs[i] = get_buf_from_mv(in_what, &mv); 816 } 817 818 fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads); 819 820 for (i = 0; i < 4; ++i) { 821 if (sads[i] < best_sad) { 822 const MV mv = {ref_mv->row + r, ref_mv->col + c + i}; 823 const unsigned int sad = sads[i] + 824 mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); 825 if (sad < best_sad) { 826 best_sad = sad; 827 *best_mv = mv; 828 } 829 } 830 } 831 } else { 832 for (i = 0; i < end_col - c; ++i) { 833 const MV mv = {ref_mv->row + r, ref_mv->col + c + i}; 834 unsigned int sad = fn_ptr->sdf(what->buf, what->stride, 835 get_buf_from_mv(in_what, &mv), in_what->stride); 836 if (sad < best_sad) { 837 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); 838 if (sad < best_sad) { 839 best_sad = sad; 840 *best_mv = mv; 841 } 842 } 843 } 844 } 845 } 846 } 847 848 return best_sad; 849} 850 851int vp9_diamond_search_sad_c(const MACROBLOCK *x, 852 const search_site_config *cfg, 853 MV *ref_mv, MV *best_mv, int search_param, 854 int sad_per_bit, int *num00, 855 const vp9_variance_fn_ptr_t *fn_ptr, 856 const MV *center_mv) { 857 int i, j, step; 858 859 const MACROBLOCKD *const xd = &x->e_mbd; 860 uint8_t *what = x->plane[0].src.buf; 861 const int what_stride = x->plane[0].src.stride; 862 const uint8_t *in_what; 863 const int in_what_stride = xd->plane[0].pre[0].stride; 864 const uint8_t *best_address; 865 866 unsigned int bestsad = INT_MAX; 867 int best_site = 0; 868 int last_site = 0; 869 870 int ref_row; 871 int ref_col; 872 873 // search_param determines the length of the initial step and hence the number 874 // of iterations. 875 // 0 = initial step (MAX_FIRST_STEP) pel 876 // 1 = (MAX_FIRST_STEP/2) pel, 877 // 2 = (MAX_FIRST_STEP/4) pel... 878 const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step]; 879 const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param; 880 881 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 882 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 883 ref_row = ref_mv->row; 884 ref_col = ref_mv->col; 885 *num00 = 0; 886 best_mv->row = ref_row; 887 best_mv->col = ref_col; 888 889 // Work out the start point for the search 890 in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col; 891 best_address = in_what; 892 893 // Check the starting position 894 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) 895 + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); 896 897 i = 1; 898 899 for (step = 0; step < tot_steps; step++) { 900 int all_in = 1, t; 901 902 // All_in is true if every one of the points we are checking are within 903 // the bounds of the image. 904 all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_row_min); 905 all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_row_max); 906 all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_col_min); 907 all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_col_max); 908 909 // If all the pixels are within the bounds we don't check whether the 910 // search point is valid in this loop, otherwise we check each point 911 // for validity.. 912 if (all_in) { 913 unsigned int sad_array[4]; 914 915 for (j = 0; j < cfg->searches_per_step; j += 4) { 916 unsigned char const *block_offset[4]; 917 918 for (t = 0; t < 4; t++) 919 block_offset[t] = ss[i + t].offset + best_address; 920 921 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, 922 sad_array); 923 924 for (t = 0; t < 4; t++, i++) { 925 if (sad_array[t] < bestsad) { 926 const MV this_mv = {best_mv->row + ss[i].mv.row, 927 best_mv->col + ss[i].mv.col}; 928 sad_array[t] += mvsad_err_cost(x, &this_mv, &fcenter_mv, 929 sad_per_bit); 930 if (sad_array[t] < bestsad) { 931 bestsad = sad_array[t]; 932 best_site = i; 933 } 934 } 935 } 936 } 937 } else { 938 for (j = 0; j < cfg->searches_per_step; j++) { 939 // Trap illegal vectors 940 const MV this_mv = {best_mv->row + ss[i].mv.row, 941 best_mv->col + ss[i].mv.col}; 942 943 if (is_mv_in(x, &this_mv)) { 944 const uint8_t *const check_here = ss[i].offset + best_address; 945 unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, 946 in_what_stride); 947 948 if (thissad < bestsad) { 949 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); 950 if (thissad < bestsad) { 951 bestsad = thissad; 952 best_site = i; 953 } 954 } 955 } 956 i++; 957 } 958 } 959 if (best_site != last_site) { 960 best_mv->row += ss[best_site].mv.row; 961 best_mv->col += ss[best_site].mv.col; 962 best_address += ss[best_site].offset; 963 last_site = best_site; 964#if defined(NEW_DIAMOND_SEARCH) 965 while (1) { 966 const MV this_mv = {best_mv->row + ss[best_site].mv.row, 967 best_mv->col + ss[best_site].mv.col}; 968 if (is_mv_in(x, &this_mv)) { 969 const uint8_t *const check_here = ss[best_site].offset + best_address; 970 unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, 971 in_what_stride); 972 if (thissad < bestsad) { 973 thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); 974 if (thissad < bestsad) { 975 bestsad = thissad; 976 best_mv->row += ss[best_site].mv.row; 977 best_mv->col += ss[best_site].mv.col; 978 best_address += ss[best_site].offset; 979 continue; 980 } 981 } 982 } 983 break; 984 }; 985#endif 986 } else if (best_address == in_what) { 987 (*num00)++; 988 } 989 } 990 return bestsad; 991} 992 993/* do_refine: If last step (1-away) of n-step search doesn't pick the center 994 point as the best match, we will do a final 1-away diamond 995 refining search */ 996 997int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, 998 MV *mvp_full, int step_param, 999 int sadpb, int further_steps, int do_refine, 1000 const vp9_variance_fn_ptr_t *fn_ptr, 1001 const MV *ref_mv, MV *dst_mv) { 1002 MV temp_mv; 1003 int thissme, n, num00 = 0; 1004 int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv, 1005 step_param, sadpb, &n, 1006 fn_ptr, ref_mv); 1007 if (bestsme < INT_MAX) 1008 bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); 1009 *dst_mv = temp_mv; 1010 1011 // If there won't be more n-step search, check to see if refining search is 1012 // needed. 1013 if (n > further_steps) 1014 do_refine = 0; 1015 1016 while (n < further_steps) { 1017 ++n; 1018 1019 if (num00) { 1020 num00--; 1021 } else { 1022 thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv, 1023 step_param + n, sadpb, &num00, 1024 fn_ptr, ref_mv); 1025 if (thissme < INT_MAX) 1026 thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); 1027 1028 // check to see if refining search is needed. 1029 if (num00 > further_steps - n) 1030 do_refine = 0; 1031 1032 if (thissme < bestsme) { 1033 bestsme = thissme; 1034 *dst_mv = temp_mv; 1035 } 1036 } 1037 } 1038 1039 // final 1-away diamond refining search 1040 if (do_refine) { 1041 const int search_range = 8; 1042 MV best_mv = *dst_mv; 1043 thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range, 1044 fn_ptr, ref_mv); 1045 if (thissme < INT_MAX) 1046 thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1); 1047 if (thissme < bestsme) { 1048 bestsme = thissme; 1049 *dst_mv = best_mv; 1050 } 1051 } 1052 return bestsme; 1053} 1054 1055int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, 1056 int sad_per_bit, int distance, 1057 const vp9_variance_fn_ptr_t *fn_ptr, 1058 const MV *center_mv, MV *best_mv) { 1059 int r, c; 1060 const MACROBLOCKD *const xd = &x->e_mbd; 1061 const struct buf_2d *const what = &x->plane[0].src; 1062 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1063 const int row_min = MAX(ref_mv->row - distance, x->mv_row_min); 1064 const int row_max = MIN(ref_mv->row + distance, x->mv_row_max); 1065 const int col_min = MAX(ref_mv->col - distance, x->mv_col_min); 1066 const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); 1067 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 1068 int best_sad = fn_ptr->sdf(what->buf, what->stride, 1069 get_buf_from_mv(in_what, ref_mv), in_what->stride) + 1070 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); 1071 *best_mv = *ref_mv; 1072 1073 for (r = row_min; r < row_max; ++r) { 1074 for (c = col_min; c < col_max; ++c) { 1075 const MV mv = {r, c}; 1076 const int sad = fn_ptr->sdf(what->buf, what->stride, 1077 get_buf_from_mv(in_what, &mv), in_what->stride) + 1078 mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); 1079 if (sad < best_sad) { 1080 best_sad = sad; 1081 *best_mv = mv; 1082 } 1083 } 1084 } 1085 return best_sad; 1086} 1087 1088int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, 1089 int sad_per_bit, int distance, 1090 const vp9_variance_fn_ptr_t *fn_ptr, 1091 const MV *center_mv, MV *best_mv) { 1092 int r; 1093 const MACROBLOCKD *const xd = &x->e_mbd; 1094 const struct buf_2d *const what = &x->plane[0].src; 1095 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1096 const int row_min = MAX(ref_mv->row - distance, x->mv_row_min); 1097 const int row_max = MIN(ref_mv->row + distance, x->mv_row_max); 1098 const int col_min = MAX(ref_mv->col - distance, x->mv_col_min); 1099 const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); 1100 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 1101 unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, 1102 get_buf_from_mv(in_what, ref_mv), in_what->stride) + 1103 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); 1104 *best_mv = *ref_mv; 1105 1106 for (r = row_min; r < row_max; ++r) { 1107 int c = col_min; 1108 const uint8_t *check_here = &in_what->buf[r * in_what->stride + c]; 1109 1110 if (fn_ptr->sdx3f != NULL) { 1111 while ((c + 2) < col_max) { 1112 int i; 1113 unsigned int sads[3]; 1114 1115 fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride, 1116 sads); 1117 1118 for (i = 0; i < 3; ++i) { 1119 unsigned int sad = sads[i]; 1120 if (sad < best_sad) { 1121 const MV mv = {r, c}; 1122 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); 1123 if (sad < best_sad) { 1124 best_sad = sad; 1125 *best_mv = mv; 1126 } 1127 } 1128 ++check_here; 1129 ++c; 1130 } 1131 } 1132 } 1133 1134 while (c < col_max) { 1135 unsigned int sad = fn_ptr->sdf(what->buf, what->stride, 1136 check_here, in_what->stride); 1137 if (sad < best_sad) { 1138 const MV mv = {r, c}; 1139 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); 1140 if (sad < best_sad) { 1141 best_sad = sad; 1142 *best_mv = mv; 1143 } 1144 } 1145 ++check_here; 1146 ++c; 1147 } 1148 } 1149 1150 return best_sad; 1151} 1152 1153int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, 1154 int sad_per_bit, int distance, 1155 const vp9_variance_fn_ptr_t *fn_ptr, 1156 const MV *center_mv, MV *best_mv) { 1157 int r; 1158 const MACROBLOCKD *const xd = &x->e_mbd; 1159 const struct buf_2d *const what = &x->plane[0].src; 1160 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1161 const int row_min = MAX(ref_mv->row - distance, x->mv_row_min); 1162 const int row_max = MIN(ref_mv->row + distance, x->mv_row_max); 1163 const int col_min = MAX(ref_mv->col - distance, x->mv_col_min); 1164 const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); 1165 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 1166 unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, 1167 get_buf_from_mv(in_what, ref_mv), in_what->stride) + 1168 mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); 1169 *best_mv = *ref_mv; 1170 1171 for (r = row_min; r < row_max; ++r) { 1172 int c = col_min; 1173 const uint8_t *check_here = &in_what->buf[r * in_what->stride + c]; 1174 1175 if (fn_ptr->sdx8f != NULL) { 1176 while ((c + 7) < col_max) { 1177 int i; 1178 unsigned int sads[8]; 1179 1180 fn_ptr->sdx8f(what->buf, what->stride, check_here, in_what->stride, 1181 sads); 1182 1183 for (i = 0; i < 8; ++i) { 1184 unsigned int sad = sads[i]; 1185 if (sad < best_sad) { 1186 const MV mv = {r, c}; 1187 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); 1188 if (sad < best_sad) { 1189 best_sad = sad; 1190 *best_mv = mv; 1191 } 1192 } 1193 ++check_here; 1194 ++c; 1195 } 1196 } 1197 } 1198 1199 if (fn_ptr->sdx3f != NULL) { 1200 while ((c + 2) < col_max) { 1201 int i; 1202 unsigned int sads[3]; 1203 1204 fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride, 1205 sads); 1206 1207 for (i = 0; i < 3; ++i) { 1208 unsigned int sad = sads[i]; 1209 if (sad < best_sad) { 1210 const MV mv = {r, c}; 1211 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); 1212 if (sad < best_sad) { 1213 best_sad = sad; 1214 *best_mv = mv; 1215 } 1216 } 1217 ++check_here; 1218 ++c; 1219 } 1220 } 1221 } 1222 1223 while (c < col_max) { 1224 unsigned int sad = fn_ptr->sdf(what->buf, what->stride, 1225 check_here, in_what->stride); 1226 if (sad < best_sad) { 1227 const MV mv = {r, c}; 1228 sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); 1229 if (sad < best_sad) { 1230 best_sad = sad; 1231 *best_mv = mv; 1232 } 1233 } 1234 ++check_here; 1235 ++c; 1236 } 1237 } 1238 1239 return best_sad; 1240} 1241 1242int vp9_refining_search_sad_c(const MACROBLOCK *x, 1243 MV *ref_mv, int error_per_bit, 1244 int search_range, 1245 const vp9_variance_fn_ptr_t *fn_ptr, 1246 const MV *center_mv) { 1247 const MACROBLOCKD *const xd = &x->e_mbd; 1248 const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; 1249 const struct buf_2d *const what = &x->plane[0].src; 1250 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1251 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 1252 const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv); 1253 unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address, 1254 in_what->stride) + 1255 mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); 1256 int i, j; 1257 1258 for (i = 0; i < search_range; i++) { 1259 int best_site = -1; 1260 const int all_in = ((ref_mv->row - 1) > x->mv_row_min) & 1261 ((ref_mv->row + 1) < x->mv_row_max) & 1262 ((ref_mv->col - 1) > x->mv_col_min) & 1263 ((ref_mv->col + 1) < x->mv_col_max); 1264 1265 if (all_in) { 1266 unsigned int sads[4]; 1267 const uint8_t *const positions[4] = { 1268 best_address - in_what->stride, 1269 best_address - 1, 1270 best_address + 1, 1271 best_address + in_what->stride 1272 }; 1273 1274 fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads); 1275 1276 for (j = 0; j < 4; ++j) { 1277 if (sads[j] < best_sad) { 1278 const MV mv = {ref_mv->row + neighbors[j].row, 1279 ref_mv->col + neighbors[j].col}; 1280 sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); 1281 if (sads[j] < best_sad) { 1282 best_sad = sads[j]; 1283 best_site = j; 1284 } 1285 } 1286 } 1287 } else { 1288 for (j = 0; j < 4; ++j) { 1289 const MV mv = {ref_mv->row + neighbors[j].row, 1290 ref_mv->col + neighbors[j].col}; 1291 1292 if (is_mv_in(x, &mv)) { 1293 unsigned int sad = fn_ptr->sdf(what->buf, what->stride, 1294 get_buf_from_mv(in_what, &mv), 1295 in_what->stride); 1296 if (sad < best_sad) { 1297 sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); 1298 if (sad < best_sad) { 1299 best_sad = sad; 1300 best_site = j; 1301 } 1302 } 1303 } 1304 } 1305 } 1306 1307 if (best_site == -1) { 1308 break; 1309 } else { 1310 ref_mv->row += neighbors[best_site].row; 1311 ref_mv->col += neighbors[best_site].col; 1312 best_address = get_buf_from_mv(in_what, ref_mv); 1313 } 1314 } 1315 1316 return best_sad; 1317} 1318 1319// This function is called when we do joint motion search in comp_inter_inter 1320// mode. 1321int vp9_refining_search_8p_c(const MACROBLOCK *x, 1322 MV *ref_mv, int error_per_bit, 1323 int search_range, 1324 const vp9_variance_fn_ptr_t *fn_ptr, 1325 const MV *center_mv, 1326 const uint8_t *second_pred) { 1327 const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}, 1328 {-1, -1}, {1, -1}, {-1, 1}, {1, 1}}; 1329 const MACROBLOCKD *const xd = &x->e_mbd; 1330 const struct buf_2d *const what = &x->plane[0].src; 1331 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1332 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 1333 unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride, 1334 get_buf_from_mv(in_what, ref_mv), in_what->stride, second_pred) + 1335 mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); 1336 int i, j; 1337 1338 for (i = 0; i < search_range; ++i) { 1339 int best_site = -1; 1340 1341 for (j = 0; j < 8; ++j) { 1342 const MV mv = {ref_mv->row + neighbors[j].row, 1343 ref_mv->col + neighbors[j].col}; 1344 1345 if (is_mv_in(x, &mv)) { 1346 unsigned int sad = fn_ptr->sdaf(what->buf, what->stride, 1347 get_buf_from_mv(in_what, &mv), in_what->stride, second_pred); 1348 if (sad < best_sad) { 1349 sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); 1350 if (sad < best_sad) { 1351 best_sad = sad; 1352 best_site = j; 1353 } 1354 } 1355 } 1356 } 1357 1358 if (best_site == -1) { 1359 break; 1360 } else { 1361 ref_mv->row += neighbors[best_site].row; 1362 ref_mv->col += neighbors[best_site].col; 1363 } 1364 } 1365 return best_sad; 1366} 1367 1368int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, 1369 BLOCK_SIZE bsize, MV *mvp_full, 1370 int step_param, int error_per_bit, 1371 const MV *ref_mv, MV *tmp_mv, 1372 int var_max, int rd) { 1373 const SPEED_FEATURES *const sf = &cpi->sf; 1374 const SEARCH_METHODS method = sf->mv.search_method; 1375 vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; 1376 int var = 0; 1377 1378 switch (method) { 1379 case FAST_DIAMOND: 1380 var = vp9_fast_dia_search(x, mvp_full, step_param, error_per_bit, 0, 1381 fn_ptr, 1, ref_mv, tmp_mv); 1382 break; 1383 case FAST_HEX: 1384 var = vp9_fast_hex_search(x, mvp_full, step_param, error_per_bit, 0, 1385 fn_ptr, 1, ref_mv, tmp_mv); 1386 break; 1387 case HEX: 1388 var = vp9_hex_search(x, mvp_full, step_param, error_per_bit, 1, 1389 fn_ptr, 1, ref_mv, tmp_mv); 1390 break; 1391 case SQUARE: 1392 var = vp9_square_search(x, mvp_full, step_param, error_per_bit, 1, 1393 fn_ptr, 1, ref_mv, tmp_mv); 1394 break; 1395 case BIGDIA: 1396 var = vp9_bigdia_search(x, mvp_full, step_param, error_per_bit, 1, 1397 fn_ptr, 1, ref_mv, tmp_mv); 1398 break; 1399 case NSTEP: 1400 var = vp9_full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit, 1401 MAX_MVSEARCH_STEPS - 1 - step_param, 1402 1, fn_ptr, ref_mv, tmp_mv); 1403 break; 1404 default: 1405 assert(!"Invalid search method."); 1406 } 1407 1408 if (method != NSTEP && rd && var < var_max) 1409 var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1); 1410 1411 return var; 1412} 1413