1/* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include <limits.h> 12#include <math.h> 13#include <stdio.h> 14 15#include "./vpx_config.h" 16 17#include "vpx_mem/vpx_mem.h" 18 19#include "vp9/common/vp9_common.h" 20 21#include "vp9/encoder/vp9_onyx_int.h" 22#include "vp9/encoder/vp9_mcomp.h" 23 24// #define NEW_DIAMOND_SEARCH 25 26static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf, 27 const MV *mv) { 28 return &buf->buf[mv->row * buf->stride + mv->col]; 29} 30 31void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) { 32 int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); 33 int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); 34 int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL; 35 int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL; 36 37 col_min = MAX(col_min, (MV_LOW >> 3) + 1); 38 row_min = MAX(row_min, (MV_LOW >> 3) + 1); 39 col_max = MIN(col_max, (MV_UPP >> 3) - 1); 40 row_max = MIN(row_max, (MV_UPP >> 3) - 1); 41 42 // Get intersection of UMV window and valid MV window to reduce # of checks 43 // in diamond search. 44 if (x->mv_col_min < col_min) 45 x->mv_col_min = col_min; 46 if (x->mv_col_max > col_max) 47 x->mv_col_max = col_max; 48 if (x->mv_row_min < row_min) 49 x->mv_row_min = row_min; 50 if (x->mv_row_max > row_max) 51 x->mv_row_max = row_max; 52} 53 54int vp9_init_search_range(VP9_COMP *cpi, int size) { 55 int sr = 0; 56 57 // Minimum search size no matter what the passed in value. 58 size = MAX(16, size); 59 60 while ((size << sr) < MAX_FULL_PEL_VAL) 61 sr++; 62 63 sr += cpi->sf.reduce_first_step_size; 64 sr = MIN(sr, (cpi->sf.max_step_search_steps - 2)); 65 return sr; 66} 67 68static INLINE int mv_cost(const MV *mv, 69 const int *joint_cost, int *comp_cost[2]) { 70 return joint_cost[vp9_get_mv_joint(mv)] + 71 comp_cost[0][mv->row] + comp_cost[1][mv->col]; 72} 73 74int vp9_mv_bit_cost(const MV *mv, const MV *ref, 75 const int *mvjcost, int *mvcost[2], int weight) { 76 const MV diff = { mv->row - ref->row, 77 mv->col - ref->col }; 78 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7); 79} 80 81static int mv_err_cost(const MV *mv, const MV *ref, 82 const int *mvjcost, int *mvcost[2], 83 int error_per_bit) { 84 if (mvcost) { 85 const MV diff = { mv->row - ref->row, 86 mv->col - ref->col }; 87 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * 88 error_per_bit, 13); 89 } 90 return 0; 91} 92 93static int mvsad_err_cost(const MV *mv, const MV *ref, 94 const int *mvjsadcost, int *mvsadcost[2], 95 int error_per_bit) { 96 if (mvsadcost) { 97 const MV diff = { mv->row - ref->row, 98 mv->col - ref->col }; 99 return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjsadcost, mvsadcost) * 100 error_per_bit, 8); 101 } 102 return 0; 103} 104 105void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) { 106 int len, ss_count = 1; 107 108 x->ss[0].mv.col = x->ss[0].mv.row = 0; 109 x->ss[0].offset = 0; 110 111 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { 112 // Generate offsets for 4 search sites per step. 113 const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}}; 114 int i; 115 for (i = 0; i < 4; ++i) { 116 search_site *const ss = &x->ss[ss_count++]; 117 ss->mv = ss_mvs[i]; 118 ss->offset = ss->mv.row * stride + ss->mv.col; 119 } 120 } 121 122 x->ss_count = ss_count; 123 x->searches_per_step = 4; 124} 125 126void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { 127 int len, ss_count = 1; 128 129 x->ss[0].mv.col = x->ss[0].mv.row = 0; 130 x->ss[0].offset = 0; 131 132 for (len = MAX_FIRST_STEP; len > 0; len /= 2) { 133 // Generate offsets for 8 search sites per step. 134 const MV ss_mvs[8] = { 135 {-len, 0 }, {len, 0 }, { 0, -len}, {0, len}, 136 {-len, -len}, {-len, len}, {len, -len}, {len, len} 137 }; 138 int i; 139 for (i = 0; i < 8; ++i) { 140 search_site *const ss = &x->ss[ss_count++]; 141 ss->mv = ss_mvs[i]; 142 ss->offset = ss->mv.row * stride + ss->mv.col; 143 } 144 } 145 146 x->ss_count = ss_count; 147 x->searches_per_step = 8; 148} 149 150/* 151 * To avoid the penalty for crossing cache-line read, preload the reference 152 * area in a small buffer, which is aligned to make sure there won't be crossing 153 * cache-line read while reading from this buffer. This reduced the cpu 154 * cycles spent on reading ref data in sub-pixel filter functions. 155 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x 156 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we 157 * could reduce the area. 158 */ 159 160/* estimated cost of a motion vector (r,c) */ 161#define MVC(r, c) \ 162 (mvcost ? \ 163 ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \ 164 mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \ 165 error_per_bit + 4096) >> 13 : 0) 166 167 168// convert motion vector component to offset for svf calc 169static INLINE int sp(int x) { 170 return (x & 7) << 1; 171} 172 173static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c, 174 int offset) { 175 return &buf[(r >> 3) * stride + (c >> 3) - offset]; 176} 177 178/* returns subpixel variance error function */ 179#define DIST(r, c) \ 180 vfp->svf(pre(y, y_stride, r, c, offset), y_stride, sp(c), sp(r), z, \ 181 src_stride, &sse) 182 183/* checks if (r, c) has better score than previous best */ 184#define CHECK_BETTER(v, r, c) \ 185 if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ 186 thismse = (DIST(r, c)); \ 187 if ((v = MVC(r, c) + thismse) < besterr) { \ 188 besterr = v; \ 189 br = r; \ 190 bc = c; \ 191 *distortion = thismse; \ 192 *sse1 = sse; \ 193 } \ 194 } else { \ 195 v = INT_MAX; \ 196 } 197 198#define FIRST_LEVEL_CHECKS \ 199 { \ 200 unsigned int left, right, up, down, diag; \ 201 CHECK_BETTER(left, tr, tc - hstep); \ 202 CHECK_BETTER(right, tr, tc + hstep); \ 203 CHECK_BETTER(up, tr - hstep, tc); \ 204 CHECK_BETTER(down, tr + hstep, tc); \ 205 whichdir = (left < right ? 0 : 1) + \ 206 (up < down ? 0 : 2); \ 207 switch (whichdir) { \ 208 case 0: \ 209 CHECK_BETTER(diag, tr - hstep, tc - hstep); \ 210 break; \ 211 case 1: \ 212 CHECK_BETTER(diag, tr - hstep, tc + hstep); \ 213 break; \ 214 case 2: \ 215 CHECK_BETTER(diag, tr + hstep, tc - hstep); \ 216 break; \ 217 case 3: \ 218 CHECK_BETTER(diag, tr + hstep, tc + hstep); \ 219 break; \ 220 } \ 221 } 222 223#define SECOND_LEVEL_CHECKS \ 224 { \ 225 int kr, kc; \ 226 unsigned int second; \ 227 if (tr != br && tc != bc) { \ 228 kr = br - tr; \ 229 kc = bc - tc; \ 230 CHECK_BETTER(second, tr + kr, tc + 2 * kc); \ 231 CHECK_BETTER(second, tr + 2 * kr, tc + kc); \ 232 } else if (tr == br && tc != bc) { \ 233 kc = bc - tc; \ 234 CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \ 235 CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \ 236 switch (whichdir) { \ 237 case 0: \ 238 case 1: \ 239 CHECK_BETTER(second, tr + hstep, tc + kc); \ 240 break; \ 241 case 2: \ 242 case 3: \ 243 CHECK_BETTER(second, tr - hstep, tc + kc); \ 244 break; \ 245 } \ 246 } else if (tr != br && tc == bc) { \ 247 kr = br - tr; \ 248 CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \ 249 CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \ 250 switch (whichdir) { \ 251 case 0: \ 252 case 2: \ 253 CHECK_BETTER(second, tr + kr, tc + hstep); \ 254 break; \ 255 case 1: \ 256 case 3: \ 257 CHECK_BETTER(second, tr + kr, tc - hstep); \ 258 break; \ 259 } \ 260 } \ 261 } 262 263int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, 264 MV *bestmv, const MV *ref_mv, 265 int allow_hp, 266 int error_per_bit, 267 const vp9_variance_fn_ptr_t *vfp, 268 int forced_stop, 269 int iters_per_step, 270 int *mvjcost, int *mvcost[2], 271 int *distortion, 272 unsigned int *sse1) { 273 const uint8_t *z = x->plane[0].src.buf; 274 const int src_stride = x->plane[0].src.stride; 275 const MACROBLOCKD *xd = &x->e_mbd; 276 unsigned int besterr = INT_MAX; 277 unsigned int sse; 278 unsigned int whichdir; 279 int thismse; 280 unsigned int halfiters = iters_per_step; 281 unsigned int quarteriters = iters_per_step; 282 unsigned int eighthiters = iters_per_step; 283 284 const int y_stride = xd->plane[0].pre[0].stride; 285 const int offset = bestmv->row * y_stride + bestmv->col; 286 const uint8_t *y = xd->plane[0].pre[0].buf + offset; 287 288 int rr = ref_mv->row; 289 int rc = ref_mv->col; 290 int br = bestmv->row * 8; 291 int bc = bestmv->col * 8; 292 int hstep = 4; 293 const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); 294 const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); 295 const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); 296 const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); 297 298 int tr = br; 299 int tc = bc; 300 301 // central mv 302 bestmv->row *= 8; 303 bestmv->col *= 8; 304 305 // calculate central point error 306 besterr = vfp->vf(y, y_stride, z, src_stride, sse1); 307 *distortion = besterr; 308 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 309 310 // 1/2 pel 311 FIRST_LEVEL_CHECKS; 312 if (halfiters > 1) { 313 SECOND_LEVEL_CHECKS; 314 } 315 tr = br; 316 tc = bc; 317 318 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 319 if (forced_stop != 2) { 320 hstep >>= 1; 321 FIRST_LEVEL_CHECKS; 322 if (quarteriters > 1) { 323 SECOND_LEVEL_CHECKS; 324 } 325 tr = br; 326 tc = bc; 327 } 328 329 if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { 330 hstep >>= 1; 331 FIRST_LEVEL_CHECKS; 332 if (eighthiters > 1) { 333 SECOND_LEVEL_CHECKS; 334 } 335 tr = br; 336 tc = bc; 337 } 338 // These lines insure static analysis doesn't warn that 339 // tr and tc aren't used after the above point. 340 (void) tr; 341 (void) tc; 342 343 bestmv->row = br; 344 bestmv->col = bc; 345 346 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || 347 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) 348 return INT_MAX; 349 350 return besterr; 351} 352 353#undef DIST 354/* returns subpixel variance error function */ 355#define DIST(r, c) \ 356 vfp->svaf(pre(y, y_stride, r, c, offset), y_stride, sp(c), sp(r), \ 357 z, src_stride, &sse, second_pred) 358 359int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x, 360 MV *bestmv, const MV *ref_mv, 361 int allow_hp, 362 int error_per_bit, 363 const vp9_variance_fn_ptr_t *vfp, 364 int forced_stop, 365 int iters_per_step, 366 int *mvjcost, int *mvcost[2], 367 int *distortion, 368 unsigned int *sse1, 369 const uint8_t *second_pred, 370 int w, int h) { 371 const uint8_t *z = x->plane[0].src.buf; 372 const int src_stride = x->plane[0].src.stride; 373 const MACROBLOCKD *xd = &x->e_mbd; 374 unsigned int besterr = INT_MAX; 375 unsigned int sse; 376 unsigned int whichdir; 377 int thismse; 378 const unsigned int halfiters = iters_per_step; 379 const unsigned int quarteriters = iters_per_step; 380 const unsigned int eighthiters = iters_per_step; 381 382 DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); 383 const int y_stride = xd->plane[0].pre[0].stride; 384 const int offset = bestmv->row * y_stride + bestmv->col; 385 const uint8_t *y = xd->plane[0].pre[0].buf + offset; 386 387 int rr = ref_mv->row; 388 int rc = ref_mv->col; 389 int br = bestmv->row * 8; 390 int bc = bestmv->col * 8; 391 int hstep = 4; 392 const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); 393 const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); 394 const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); 395 const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); 396 397 int tr = br; 398 int tc = bc; 399 400 // central mv 401 bestmv->row *= 8; 402 bestmv->col *= 8; 403 404 // calculate central point error 405 // TODO(yunqingwang): central pointer error was already calculated in full- 406 // pixel search, and can be passed in this function. 407 vp9_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); 408 besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); 409 *distortion = besterr; 410 besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); 411 412 // Each subsequent iteration checks at least one point in 413 // common with the last iteration could be 2 ( if diag selected) 414 // 1/2 pel 415 FIRST_LEVEL_CHECKS; 416 if (halfiters > 1) { 417 SECOND_LEVEL_CHECKS; 418 } 419 tr = br; 420 tc = bc; 421 422 // Each subsequent iteration checks at least one point in common with 423 // the last iteration could be 2 ( if diag selected) 1/4 pel 424 425 // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only 426 if (forced_stop != 2) { 427 hstep >>= 1; 428 FIRST_LEVEL_CHECKS; 429 if (quarteriters > 1) { 430 SECOND_LEVEL_CHECKS; 431 } 432 tr = br; 433 tc = bc; 434 } 435 436 if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { 437 hstep >>= 1; 438 FIRST_LEVEL_CHECKS; 439 if (eighthiters > 1) { 440 SECOND_LEVEL_CHECKS; 441 } 442 tr = br; 443 tc = bc; 444 } 445 // These lines insure static analysis doesn't warn that 446 // tr and tc aren't used after the above point. 447 (void) tr; 448 (void) tc; 449 450 bestmv->row = br; 451 bestmv->col = bc; 452 453 if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || 454 (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) 455 return INT_MAX; 456 457 return besterr; 458} 459 460#undef MVC 461#undef PRE 462#undef DIST 463#undef CHECK_BETTER 464 465static INLINE int check_bounds(const MACROBLOCK *x, int row, int col, 466 int range) { 467 return ((row - range) >= x->mv_row_min) & 468 ((row + range) <= x->mv_row_max) & 469 ((col - range) >= x->mv_col_min) & 470 ((col + range) <= x->mv_col_max); 471} 472 473static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) { 474 return (mv->col >= x->mv_col_min) && (mv->col <= x->mv_col_max) && 475 (mv->row >= x->mv_row_min) && (mv->row <= x->mv_row_max); 476} 477 478#define CHECK_BETTER \ 479 {\ 480 if (thissad < bestsad) {\ 481 if (use_mvcost) \ 482 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, \ 483 mvjsadcost, mvsadcost, sad_per_bit);\ 484 if (thissad < bestsad) {\ 485 bestsad = thissad;\ 486 best_site = i;\ 487 }\ 488 }\ 489 } 490 491#define MAX_PATTERN_SCALES 11 492#define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale 493#define PATTERN_CANDIDATES_REF 3 // number of refinement candidates 494 495// Generic pattern search function that searches over multiple scales. 496// Each scale can have a different number of candidates and shape of 497// candidates as indicated in the num_candidates and candidates arrays 498// passed into this function 499static int vp9_pattern_search(const MACROBLOCK *x, 500 MV *ref_mv, 501 int search_param, 502 int sad_per_bit, 503 int do_init_search, int do_refine, 504 const vp9_variance_fn_ptr_t *vfp, 505 int use_mvcost, 506 const MV *center_mv, MV *best_mv, 507 const int num_candidates[MAX_PATTERN_SCALES], 508 const MV candidates[MAX_PATTERN_SCALES] 509 [MAX_PATTERN_CANDIDATES]) { 510 const MACROBLOCKD *const xd = &x->e_mbd; 511 static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { 512 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 513 }; 514 int i, j, s, t; 515 const struct buf_2d *const what = &x->plane[0].src; 516 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 517 int br, bc; 518 int bestsad = INT_MAX; 519 int thissad; 520 int k = -1; 521 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 522 int best_init_s = search_param_to_steps[search_param]; 523 const int *const mvjsadcost = x->nmvjointsadcost; 524 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 525 526 // adjust ref_mv to make sure it is within MV range 527 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 528 br = ref_mv->row; 529 bc = ref_mv->col; 530 531 // Work out the start point for the search 532 bestsad = vfp->sdf(what->buf, what->stride, 533 get_buf_from_mv(in_what, ref_mv), in_what->stride, 534 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, 535 mvjsadcost, mvsadcost, sad_per_bit); 536 537 // Search all possible scales upto the search param around the center point 538 // pick the scale of the point that is best as the starting scale of 539 // further steps around it. 540 if (do_init_search) { 541 s = best_init_s; 542 best_init_s = -1; 543 for (t = 0; t <= s; ++t) { 544 int best_site = -1; 545 if (check_bounds(x, br, bc, 1 << t)) { 546 for (i = 0; i < num_candidates[t]; i++) { 547 const MV this_mv = {br + candidates[t][i].row, 548 bc + candidates[t][i].col}; 549 thissad = vfp->sdf(what->buf, what->stride, 550 get_buf_from_mv(in_what, &this_mv), 551 in_what->stride, bestsad); 552 CHECK_BETTER 553 } 554 } else { 555 for (i = 0; i < num_candidates[t]; i++) { 556 const MV this_mv = {br + candidates[t][i].row, 557 bc + candidates[t][i].col}; 558 if (!is_mv_in(x, &this_mv)) 559 continue; 560 thissad = vfp->sdf(what->buf, what->stride, 561 get_buf_from_mv(in_what, &this_mv), 562 in_what->stride, bestsad); 563 CHECK_BETTER 564 } 565 } 566 if (best_site == -1) { 567 continue; 568 } else { 569 best_init_s = t; 570 k = best_site; 571 } 572 } 573 if (best_init_s != -1) { 574 br += candidates[best_init_s][k].row; 575 bc += candidates[best_init_s][k].col; 576 } 577 } 578 579 // If the center point is still the best, just skip this and move to 580 // the refinement step. 581 if (best_init_s != -1) { 582 int best_site = -1; 583 s = best_init_s; 584 585 do { 586 // No need to search all 6 points the 1st time if initial search was used 587 if (!do_init_search || s != best_init_s) { 588 if (check_bounds(x, br, bc, 1 << s)) { 589 for (i = 0; i < num_candidates[s]; i++) { 590 const MV this_mv = {br + candidates[s][i].row, 591 bc + candidates[s][i].col}; 592 thissad = vfp->sdf(what->buf, what->stride, 593 get_buf_from_mv(in_what, &this_mv), 594 in_what->stride, bestsad); 595 CHECK_BETTER 596 } 597 } else { 598 for (i = 0; i < num_candidates[s]; i++) { 599 const MV this_mv = {br + candidates[s][i].row, 600 bc + candidates[s][i].col}; 601 if (!is_mv_in(x, &this_mv)) 602 continue; 603 thissad = vfp->sdf(what->buf, what->stride, 604 get_buf_from_mv(in_what, &this_mv), 605 in_what->stride, bestsad); 606 CHECK_BETTER 607 } 608 } 609 610 if (best_site == -1) { 611 continue; 612 } else { 613 br += candidates[s][best_site].row; 614 bc += candidates[s][best_site].col; 615 k = best_site; 616 } 617 } 618 619 do { 620 int next_chkpts_indices[PATTERN_CANDIDATES_REF]; 621 best_site = -1; 622 next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; 623 next_chkpts_indices[1] = k; 624 next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; 625 626 if (check_bounds(x, br, bc, 1 << s)) { 627 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 628 const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, 629 bc + candidates[s][next_chkpts_indices[i]].col}; 630 thissad = vfp->sdf(what->buf, what->stride, 631 get_buf_from_mv(in_what, &this_mv), 632 in_what->stride, bestsad); 633 CHECK_BETTER 634 } 635 } else { 636 for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { 637 const MV this_mv = {br + candidates[s][next_chkpts_indices[i]].row, 638 bc + candidates[s][next_chkpts_indices[i]].col}; 639 if (!is_mv_in(x, &this_mv)) 640 continue; 641 thissad = vfp->sdf(what->buf, what->stride, 642 get_buf_from_mv(in_what, &this_mv), 643 in_what->stride, bestsad); 644 CHECK_BETTER 645 } 646 } 647 648 if (best_site != -1) { 649 k = next_chkpts_indices[best_site]; 650 br += candidates[s][k].row; 651 bc += candidates[s][k].col; 652 } 653 } while (best_site != -1); 654 } while (s--); 655 } 656 657 // Check 4 1-away neighbors if do_refine is true. 658 // For most well-designed schemes do_refine will not be necessary. 659 if (do_refine) { 660 static const MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}}; 661 662 for (j = 0; j < 16; j++) { 663 int best_site = -1; 664 if (check_bounds(x, br, bc, 1)) { 665 for (i = 0; i < 4; i++) { 666 const MV this_mv = {br + neighbors[i].row, 667 bc + neighbors[i].col}; 668 thissad = vfp->sdf(what->buf, what->stride, 669 get_buf_from_mv(in_what, &this_mv), 670 in_what->stride, bestsad); 671 CHECK_BETTER 672 } 673 } else { 674 for (i = 0; i < 4; i++) { 675 const MV this_mv = {br + neighbors[i].row, 676 bc + neighbors[i].col}; 677 if (!is_mv_in(x, &this_mv)) 678 continue; 679 thissad = vfp->sdf(what->buf, what->stride, 680 get_buf_from_mv(in_what, &this_mv), 681 in_what->stride, bestsad); 682 CHECK_BETTER 683 } 684 } 685 686 if (best_site == -1) { 687 break; 688 } else { 689 br += neighbors[best_site].row; 690 bc += neighbors[best_site].col; 691 } 692 } 693 } 694 695 best_mv->row = br; 696 best_mv->col = bc; 697 698 return bestsad; 699} 700 701int vp9_get_mvpred_var(const MACROBLOCK *x, 702 const MV *best_mv, const MV *center_mv, 703 const vp9_variance_fn_ptr_t *vfp, 704 int use_mvcost) { 705 const MACROBLOCKD *const xd = &x->e_mbd; 706 const struct buf_2d *const what = &x->plane[0].src; 707 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 708 const MV mv = {best_mv->row * 8, best_mv->col * 8}; 709 unsigned int unused; 710 711 return vfp->vf(what->buf, what->stride, 712 get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) + 713 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, 714 x->mvcost, x->errorperbit) : 0); 715} 716 717int vp9_get_mvpred_av_var(const MACROBLOCK *x, 718 const MV *best_mv, const MV *center_mv, 719 const uint8_t *second_pred, 720 const vp9_variance_fn_ptr_t *vfp, 721 int use_mvcost) { 722 const MACROBLOCKD *const xd = &x->e_mbd; 723 const struct buf_2d *const what = &x->plane[0].src; 724 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 725 const MV mv = {best_mv->row * 8, best_mv->col * 8}; 726 unsigned int unused; 727 728 return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0, 729 what->buf, what->stride, &unused, second_pred) + 730 (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, 731 x->mvcost, x->errorperbit) : 0); 732} 733 734int vp9_hex_search(const MACROBLOCK *x, 735 MV *ref_mv, 736 int search_param, 737 int sad_per_bit, 738 int do_init_search, 739 const vp9_variance_fn_ptr_t *vfp, 740 int use_mvcost, 741 const MV *center_mv, MV *best_mv) { 742 // First scale has 8-closest points, the rest have 6 points in hex shape 743 // at increasing scales 744 static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 745 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 746 }; 747 // Note that the largest candidate step at each scale is 2^scale 748 static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { 749 {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, { 0, 1}, { -1, 1}, {-1, 0}}, 750 {{-1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0}}, 751 {{-2, -4}, {2, -4}, {4, 0}, {2, 4}, { -2, 4}, { -4, 0}}, 752 {{-4, -8}, {4, -8}, {8, 0}, {4, 8}, { -4, 8}, { -8, 0}}, 753 {{-8, -16}, {8, -16}, {16, 0}, {8, 16}, { -8, 16}, { -16, 0}}, 754 {{-16, -32}, {16, -32}, {32, 0}, {16, 32}, { -16, 32}, { -32, 0}}, 755 {{-32, -64}, {32, -64}, {64, 0}, {32, 64}, { -32, 64}, { -64, 0}}, 756 {{-64, -128}, {64, -128}, {128, 0}, {64, 128}, { -64, 128}, { -128, 0}}, 757 {{-128, -256}, {128, -256}, {256, 0}, {128, 256}, { -128, 256}, { -256, 0}}, 758 {{-256, -512}, {256, -512}, {512, 0}, {256, 512}, { -256, 512}, { -512, 0}}, 759 {{-512, -1024}, {512, -1024}, {1024, 0}, {512, 1024}, { -512, 1024}, 760 { -1024, 0}}, 761 }; 762 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, 763 do_init_search, 0, vfp, use_mvcost, 764 center_mv, best_mv, 765 hex_num_candidates, hex_candidates); 766} 767 768int vp9_bigdia_search(const MACROBLOCK *x, 769 MV *ref_mv, 770 int search_param, 771 int sad_per_bit, 772 int do_init_search, 773 const vp9_variance_fn_ptr_t *vfp, 774 int use_mvcost, 775 const MV *center_mv, 776 MV *best_mv) { 777 // First scale has 4-closest points, the rest have 8 points in diamond 778 // shape at increasing scales 779 static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = { 780 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 781 }; 782 // Note that the largest candidate step at each scale is 2^scale 783 static const MV bigdia_candidates[MAX_PATTERN_SCALES] 784 [MAX_PATTERN_CANDIDATES] = { 785 {{0, -1}, {1, 0}, { 0, 1}, {-1, 0}}, 786 {{-1, -1}, {0, -2}, {1, -1}, {2, 0}, {1, 1}, {0, 2}, {-1, 1}, {-2, 0}}, 787 {{-2, -2}, {0, -4}, {2, -2}, {4, 0}, {2, 2}, {0, 4}, {-2, 2}, {-4, 0}}, 788 {{-4, -4}, {0, -8}, {4, -4}, {8, 0}, {4, 4}, {0, 8}, {-4, 4}, {-8, 0}}, 789 {{-8, -8}, {0, -16}, {8, -8}, {16, 0}, {8, 8}, {0, 16}, {-8, 8}, {-16, 0}}, 790 {{-16, -16}, {0, -32}, {16, -16}, {32, 0}, {16, 16}, {0, 32}, 791 {-16, 16}, {-32, 0}}, 792 {{-32, -32}, {0, -64}, {32, -32}, {64, 0}, {32, 32}, {0, 64}, 793 {-32, 32}, {-64, 0}}, 794 {{-64, -64}, {0, -128}, {64, -64}, {128, 0}, {64, 64}, {0, 128}, 795 {-64, 64}, {-128, 0}}, 796 {{-128, -128}, {0, -256}, {128, -128}, {256, 0}, {128, 128}, {0, 256}, 797 {-128, 128}, {-256, 0}}, 798 {{-256, -256}, {0, -512}, {256, -256}, {512, 0}, {256, 256}, {0, 512}, 799 {-256, 256}, {-512, 0}}, 800 {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024}, 801 {-512, 512}, {-1024, 0}}, 802 }; 803 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, 804 do_init_search, 0, vfp, use_mvcost, 805 center_mv, best_mv, 806 bigdia_num_candidates, bigdia_candidates); 807} 808 809int vp9_square_search(const MACROBLOCK *x, 810 MV *ref_mv, 811 int search_param, 812 int sad_per_bit, 813 int do_init_search, 814 const vp9_variance_fn_ptr_t *vfp, 815 int use_mvcost, 816 const MV *center_mv, 817 MV *best_mv) { 818 // All scales have 8 closest points in square shape 819 static const int square_num_candidates[MAX_PATTERN_SCALES] = { 820 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 821 }; 822 // Note that the largest candidate step at each scale is 2^scale 823 static const MV square_candidates[MAX_PATTERN_SCALES] 824 [MAX_PATTERN_CANDIDATES] = { 825 {{-1, -1}, {0, -1}, {1, -1}, {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}}, 826 {{-2, -2}, {0, -2}, {2, -2}, {2, 0}, {2, 2}, {0, 2}, {-2, 2}, {-2, 0}}, 827 {{-4, -4}, {0, -4}, {4, -4}, {4, 0}, {4, 4}, {0, 4}, {-4, 4}, {-4, 0}}, 828 {{-8, -8}, {0, -8}, {8, -8}, {8, 0}, {8, 8}, {0, 8}, {-8, 8}, {-8, 0}}, 829 {{-16, -16}, {0, -16}, {16, -16}, {16, 0}, {16, 16}, {0, 16}, 830 {-16, 16}, {-16, 0}}, 831 {{-32, -32}, {0, -32}, {32, -32}, {32, 0}, {32, 32}, {0, 32}, 832 {-32, 32}, {-32, 0}}, 833 {{-64, -64}, {0, -64}, {64, -64}, {64, 0}, {64, 64}, {0, 64}, 834 {-64, 64}, {-64, 0}}, 835 {{-128, -128}, {0, -128}, {128, -128}, {128, 0}, {128, 128}, {0, 128}, 836 {-128, 128}, {-128, 0}}, 837 {{-256, -256}, {0, -256}, {256, -256}, {256, 0}, {256, 256}, {0, 256}, 838 {-256, 256}, {-256, 0}}, 839 {{-512, -512}, {0, -512}, {512, -512}, {512, 0}, {512, 512}, {0, 512}, 840 {-512, 512}, {-512, 0}}, 841 {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024}, 842 {0, 1024}, {-1024, 1024}, {-1024, 0}}, 843 }; 844 return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, 845 do_init_search, 0, vfp, use_mvcost, 846 center_mv, best_mv, 847 square_num_candidates, square_candidates); 848} 849 850int vp9_fast_hex_search(const MACROBLOCK *x, 851 MV *ref_mv, 852 int search_param, 853 int sad_per_bit, 854 int do_init_search, // must be zero for fast_hex 855 const vp9_variance_fn_ptr_t *vfp, 856 int use_mvcost, 857 const MV *center_mv, 858 MV *best_mv) { 859 return vp9_hex_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param), 860 sad_per_bit, do_init_search, vfp, use_mvcost, 861 center_mv, best_mv); 862} 863 864int vp9_fast_dia_search(const MACROBLOCK *x, 865 MV *ref_mv, 866 int search_param, 867 int sad_per_bit, 868 int do_init_search, 869 const vp9_variance_fn_ptr_t *vfp, 870 int use_mvcost, 871 const MV *center_mv, 872 MV *best_mv) { 873 return vp9_bigdia_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param), 874 sad_per_bit, do_init_search, vfp, use_mvcost, 875 center_mv, best_mv); 876} 877 878#undef CHECK_BETTER 879 880int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, 881 int search_param, int sad_per_bit, int *num00, 882 const vp9_variance_fn_ptr_t *fn_ptr, 883 int *mvjcost, int *mvcost[2], 884 const MV *center_mv) { 885 const MACROBLOCKD *const xd = &x->e_mbd; 886 const uint8_t *what = x->plane[0].src.buf; 887 const int what_stride = x->plane[0].src.stride; 888 const uint8_t *in_what; 889 const int in_what_stride = xd->plane[0].pre[0].stride; 890 891 unsigned int bestsad = INT_MAX; 892 int ref_row, ref_col; 893 894 unsigned int thissad; 895 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 896 897 const int *mvjsadcost = x->nmvjointsadcost; 898 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 899 900 int tr, tc; 901 int best_tr = 0; 902 int best_tc = 0; 903 int range = 64; 904 905 int start_col, end_col; 906 int start_row, end_row; 907 int i; 908 909 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 910 ref_row = ref_mv->row; 911 ref_col = ref_mv->col; 912 *num00 = 11; 913 best_mv->row = ref_row; 914 best_mv->col = ref_col; 915 916 // Work out the start point for the search 917 in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col; 918 919 // Check the starting position 920 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) 921 + mvsad_err_cost(best_mv, &fcenter_mv, 922 mvjsadcost, mvsadcost, sad_per_bit); 923 924 start_row = MAX(-range, x->mv_row_min - ref_row); 925 start_col = MAX(-range, x->mv_col_min - ref_col); 926 end_row = MIN(range, x->mv_row_max - ref_row); 927 end_col = MIN(range, x->mv_col_max - ref_col); 928 929 for (tr = start_row; tr <= end_row; ++tr) { 930 for (tc = start_col; tc <= end_col; tc += 4) { 931 if ((tc + 3) <= end_col) { 932 unsigned int sad_array[4]; 933 unsigned char const *addr_ref[4]; 934 for (i = 0; i < 4; ++i) 935 addr_ref[i] = in_what + tr * in_what_stride + tc + i; 936 937 fn_ptr->sdx4df(what, what_stride, addr_ref, in_what_stride, sad_array); 938 939 for (i = 0; i < 4; ++i) { 940 if (sad_array[i] < bestsad) { 941 const MV this_mv = {ref_row + tr, ref_col + tc + i}; 942 thissad = sad_array[i] + 943 mvsad_err_cost(&this_mv, &fcenter_mv, 944 mvjsadcost, mvsadcost, sad_per_bit); 945 if (thissad < bestsad) { 946 bestsad = thissad; 947 best_tr = tr; 948 best_tc = tc + i; 949 } 950 } 951 } 952 } else { 953 for (i = 0; i < end_col - tc; ++i) { 954 const uint8_t *check_here = in_what + tr * in_what_stride + tc + i; 955 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 956 bestsad); 957 958 if (thissad < bestsad) { 959 const MV this_mv = {ref_row + tr, ref_col + tc + i}; 960 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 961 mvjsadcost, mvsadcost, sad_per_bit); 962 963 if (thissad < bestsad) { 964 bestsad = thissad; 965 best_tr = tr; 966 best_tc = tc + i; 967 } 968 } 969 } 970 } 971 } 972 } 973 best_mv->row += best_tr; 974 best_mv->col += best_tc; 975 return bestsad; 976} 977 978int vp9_diamond_search_sad_c(const MACROBLOCK *x, 979 MV *ref_mv, MV *best_mv, 980 int search_param, int sad_per_bit, int *num00, 981 const vp9_variance_fn_ptr_t *fn_ptr, 982 int *mvjcost, int *mvcost[2], 983 const MV *center_mv) { 984 const MACROBLOCKD *const xd = &x->e_mbd; 985 const struct buf_2d *const what = &x->plane[0].src; 986 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 987 // search_param determines the length of the initial step and hence the number 988 // of iterations 989 // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = 990 // (MAX_FIRST_STEP/4) pel... etc. 991 const search_site *const ss = &x->ss[search_param * x->searches_per_step]; 992 const int tot_steps = (x->ss_count / x->searches_per_step) - search_param; 993 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 994 const int *mvjsadcost = x->nmvjointsadcost; 995 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 996 const uint8_t *best_address; 997 int best_sad = INT_MAX; 998 int best_site = 0; 999 int last_site = 0; 1000 int i, j, step; 1001 1002 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 1003 best_address = get_buf_from_mv(in_what, ref_mv); 1004 *num00 = 0; 1005 *best_mv = *ref_mv; 1006 1007 // Check the starting position 1008 best_sad = fn_ptr->sdf(what->buf, what->stride, 1009 in_what->buf, in_what->stride, 0x7fffffff) + 1010 mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); 1011 1012 i = 1; 1013 1014 for (step = 0; step < tot_steps; step++) { 1015 for (j = 0; j < x->searches_per_step; j++) { 1016 const MV mv = {best_mv->row + ss[i].mv.row, 1017 best_mv->col + ss[i].mv.col}; 1018 if (is_mv_in(x, &mv)) { 1019 int sad = fn_ptr->sdf(what->buf, what->stride, 1020 best_address + ss[i].offset, in_what->stride, 1021 best_sad); 1022 if (sad < best_sad) { 1023 sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, 1024 sad_per_bit); 1025 if (sad < best_sad) { 1026 best_sad = sad; 1027 best_site = i; 1028 } 1029 } 1030 } 1031 1032 i++; 1033 } 1034 1035 if (best_site != last_site) { 1036 best_mv->row += ss[best_site].mv.row; 1037 best_mv->col += ss[best_site].mv.col; 1038 best_address += ss[best_site].offset; 1039 last_site = best_site; 1040#if defined(NEW_DIAMOND_SEARCH) 1041 while (1) { 1042 const MV this_mv = {best_mv->row + ss[best_site].mv.row, 1043 best_mv->col + ss[best_site].mv.col}; 1044 if (is_mv_in(x, &this_mv)) { 1045 int sad = fn_ptr->sdf(what->buf, what->stride, 1046 best_address + ss[best_site].offset, 1047 in_what->stride, best_sad); 1048 if (sad < best_sad) { 1049 sad += mvsad_err_cost(&this_mv, &fcenter_mv, 1050 mvjsadcost, mvsadcost, sad_per_bit); 1051 if (sad < best_sad) { 1052 best_sad = sad; 1053 best_mv->row += ss[best_site].mv.row; 1054 best_mv->col += ss[best_site].mv.col; 1055 best_address += ss[best_site].offset; 1056 continue; 1057 } 1058 } 1059 } 1060 break; 1061 }; 1062#endif 1063 } else if (best_address == in_what->buf) { 1064 (*num00)++; 1065 } 1066 } 1067 return best_sad; 1068} 1069 1070int vp9_diamond_search_sadx4(const MACROBLOCK *x, 1071 MV *ref_mv, MV *best_mv, int search_param, 1072 int sad_per_bit, int *num00, 1073 const vp9_variance_fn_ptr_t *fn_ptr, 1074 int *mvjcost, int *mvcost[2], 1075 const MV *center_mv) { 1076 int i, j, step; 1077 1078 const MACROBLOCKD *const xd = &x->e_mbd; 1079 uint8_t *what = x->plane[0].src.buf; 1080 const int what_stride = x->plane[0].src.stride; 1081 const uint8_t *in_what; 1082 const int in_what_stride = xd->plane[0].pre[0].stride; 1083 const uint8_t *best_address; 1084 1085 unsigned int bestsad = INT_MAX; 1086 int best_site = 0; 1087 int last_site = 0; 1088 1089 int ref_row; 1090 int ref_col; 1091 1092 // search_param determines the length of the initial step and hence the number 1093 // of iterations. 1094 // 0 = initial step (MAX_FIRST_STEP) pel 1095 // 1 = (MAX_FIRST_STEP/2) pel, 1096 // 2 = (MAX_FIRST_STEP/4) pel... 1097 const search_site *ss = &x->ss[search_param * x->searches_per_step]; 1098 const int tot_steps = (x->ss_count / x->searches_per_step) - search_param; 1099 1100 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 1101 1102 const int *mvjsadcost = x->nmvjointsadcost; 1103 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1104 1105 clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 1106 ref_row = ref_mv->row; 1107 ref_col = ref_mv->col; 1108 *num00 = 0; 1109 best_mv->row = ref_row; 1110 best_mv->col = ref_col; 1111 1112 // Work out the start point for the search 1113 in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col; 1114 best_address = in_what; 1115 1116 // Check the starting position 1117 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) 1118 + mvsad_err_cost(best_mv, &fcenter_mv, 1119 mvjsadcost, mvsadcost, sad_per_bit); 1120 1121 i = 1; 1122 1123 for (step = 0; step < tot_steps; step++) { 1124 int all_in = 1, t; 1125 1126 // All_in is true if every one of the points we are checking are within 1127 // the bounds of the image. 1128 all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_row_min); 1129 all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_row_max); 1130 all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_col_min); 1131 all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_col_max); 1132 1133 // If all the pixels are within the bounds we don't check whether the 1134 // search point is valid in this loop, otherwise we check each point 1135 // for validity.. 1136 if (all_in) { 1137 unsigned int sad_array[4]; 1138 1139 for (j = 0; j < x->searches_per_step; j += 4) { 1140 unsigned char const *block_offset[4]; 1141 1142 for (t = 0; t < 4; t++) 1143 block_offset[t] = ss[i + t].offset + best_address; 1144 1145 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, 1146 sad_array); 1147 1148 for (t = 0; t < 4; t++, i++) { 1149 if (sad_array[t] < bestsad) { 1150 const MV this_mv = {best_mv->row + ss[i].mv.row, 1151 best_mv->col + ss[i].mv.col}; 1152 sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv, 1153 mvjsadcost, mvsadcost, sad_per_bit); 1154 1155 if (sad_array[t] < bestsad) { 1156 bestsad = sad_array[t]; 1157 best_site = i; 1158 } 1159 } 1160 } 1161 } 1162 } else { 1163 for (j = 0; j < x->searches_per_step; j++) { 1164 // Trap illegal vectors 1165 const MV this_mv = {best_mv->row + ss[i].mv.row, 1166 best_mv->col + ss[i].mv.col}; 1167 1168 if (is_mv_in(x, &this_mv)) { 1169 const uint8_t *const check_here = ss[i].offset + best_address; 1170 unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, 1171 in_what_stride, bestsad); 1172 1173 if (thissad < bestsad) { 1174 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1175 mvjsadcost, mvsadcost, sad_per_bit); 1176 1177 if (thissad < bestsad) { 1178 bestsad = thissad; 1179 best_site = i; 1180 } 1181 } 1182 } 1183 i++; 1184 } 1185 } 1186 if (best_site != last_site) { 1187 best_mv->row += ss[best_site].mv.row; 1188 best_mv->col += ss[best_site].mv.col; 1189 best_address += ss[best_site].offset; 1190 last_site = best_site; 1191#if defined(NEW_DIAMOND_SEARCH) 1192 while (1) { 1193 const MV this_mv = {best_mv->row + ss[best_site].mv.row, 1194 best_mv->col + ss[best_site].mv.col}; 1195 if (is_mv_in(x, &this_mv)) { 1196 const uint8_t *const check_here = ss[best_site].offset + best_address; 1197 unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, 1198 in_what_stride, bestsad); 1199 if (thissad < bestsad) { 1200 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1201 mvjsadcost, mvsadcost, sad_per_bit); 1202 if (thissad < bestsad) { 1203 bestsad = thissad; 1204 best_mv->row += ss[best_site].mv.row; 1205 best_mv->col += ss[best_site].mv.col; 1206 best_address += ss[best_site].offset; 1207 continue; 1208 } 1209 } 1210 } 1211 break; 1212 }; 1213#endif 1214 } else if (best_address == in_what) { 1215 (*num00)++; 1216 } 1217 } 1218 return bestsad; 1219} 1220 1221/* do_refine: If last step (1-away) of n-step search doesn't pick the center 1222 point as the best match, we will do a final 1-away diamond 1223 refining search */ 1224 1225int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, 1226 MV *mvp_full, int step_param, 1227 int sadpb, int further_steps, int do_refine, 1228 const vp9_variance_fn_ptr_t *fn_ptr, 1229 const MV *ref_mv, MV *dst_mv) { 1230 MV temp_mv; 1231 int thissme, n, num00 = 0; 1232 int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, 1233 step_param, sadpb, &n, 1234 fn_ptr, x->nmvjointcost, 1235 x->mvcost, ref_mv); 1236 if (bestsme < INT_MAX) 1237 bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); 1238 *dst_mv = temp_mv; 1239 1240 // If there won't be more n-step search, check to see if refining search is 1241 // needed. 1242 if (n > further_steps) 1243 do_refine = 0; 1244 1245 while (n < further_steps) { 1246 ++n; 1247 1248 if (num00) { 1249 num00--; 1250 } else { 1251 thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, 1252 step_param + n, sadpb, &num00, 1253 fn_ptr, x->nmvjointcost, x->mvcost, 1254 ref_mv); 1255 if (thissme < INT_MAX) 1256 thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); 1257 1258 // check to see if refining search is needed. 1259 if (num00 > further_steps - n) 1260 do_refine = 0; 1261 1262 if (thissme < bestsme) { 1263 bestsme = thissme; 1264 *dst_mv = temp_mv; 1265 } 1266 } 1267 } 1268 1269 // final 1-away diamond refining search 1270 if (do_refine) { 1271 const int search_range = 8; 1272 MV best_mv = *dst_mv; 1273 thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range, 1274 fn_ptr, x->nmvjointcost, x->mvcost, 1275 ref_mv); 1276 if (thissme < INT_MAX) 1277 thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1); 1278 if (thissme < bestsme) { 1279 bestsme = thissme; 1280 *dst_mv = best_mv; 1281 } 1282 } 1283 return bestsme; 1284} 1285 1286int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, 1287 int sad_per_bit, int distance, 1288 const vp9_variance_fn_ptr_t *fn_ptr, 1289 int *mvjcost, int *mvcost[2], 1290 const MV *center_mv, MV *best_mv) { 1291 int r, c; 1292 const MACROBLOCKD *const xd = &x->e_mbd; 1293 const struct buf_2d *const what = &x->plane[0].src; 1294 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1295 const int row_min = MAX(ref_mv->row - distance, x->mv_row_min); 1296 const int row_max = MIN(ref_mv->row + distance, x->mv_row_max); 1297 const int col_min = MAX(ref_mv->col - distance, x->mv_col_min); 1298 const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); 1299 const int *mvjsadcost = x->nmvjointsadcost; 1300 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1301 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 1302 int best_sad = fn_ptr->sdf(what->buf, what->stride, 1303 get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + 1304 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); 1305 *best_mv = *ref_mv; 1306 1307 for (r = row_min; r < row_max; ++r) { 1308 for (c = col_min; c < col_max; ++c) { 1309 const MV mv = {r, c}; 1310 const int sad = fn_ptr->sdf(what->buf, what->stride, 1311 get_buf_from_mv(in_what, &mv), in_what->stride, best_sad) + 1312 mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, 1313 sad_per_bit); 1314 1315 if (sad < best_sad) { 1316 best_sad = sad; 1317 *best_mv = mv; 1318 } 1319 } 1320 } 1321 return best_sad; 1322} 1323 1324int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, 1325 int sad_per_bit, int distance, 1326 const vp9_variance_fn_ptr_t *fn_ptr, 1327 int *mvjcost, int *mvcost[2], 1328 const MV *center_mv, MV *best_mv) { 1329 const MACROBLOCKD *const xd = &x->e_mbd; 1330 const uint8_t *const what = x->plane[0].src.buf; 1331 const int what_stride = x->plane[0].src.stride; 1332 const uint8_t *const in_what = xd->plane[0].pre[0].buf; 1333 const int in_what_stride = xd->plane[0].pre[0].stride; 1334 MV this_mv; 1335 unsigned int bestsad = INT_MAX; 1336 int r, c; 1337 unsigned int thissad; 1338 int ref_row = ref_mv->row; 1339 int ref_col = ref_mv->col; 1340 1341 // Apply further limits to prevent us looking using vectors that stretch 1342 // beyond the UMV border 1343 const int row_min = MAX(ref_row - distance, x->mv_row_min); 1344 const int row_max = MIN(ref_row + distance, x->mv_row_max); 1345 const int col_min = MAX(ref_col - distance, x->mv_col_min); 1346 const int col_max = MIN(ref_col + distance, x->mv_col_max); 1347 unsigned int sad_array[3]; 1348 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 1349 const int *mvjsadcost = x->nmvjointsadcost; 1350 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1351 1352 // Work out the mid point for the search 1353 const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col]; 1354 1355 best_mv->row = ref_row; 1356 best_mv->col = ref_col; 1357 1358 // Baseline value at the centre 1359 bestsad = fn_ptr->sdf(what, what_stride, 1360 bestaddress, in_what_stride, 0x7fffffff) 1361 + mvsad_err_cost(best_mv, &fcenter_mv, 1362 mvjsadcost, mvsadcost, sad_per_bit); 1363 1364 for (r = row_min; r < row_max; r++) { 1365 const uint8_t *check_here = &in_what[r * in_what_stride + col_min]; 1366 this_mv.row = r; 1367 c = col_min; 1368 1369 while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) { 1370 int i; 1371 1372 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); 1373 1374 for (i = 0; i < 3; i++) { 1375 thissad = sad_array[i]; 1376 1377 if (thissad < bestsad) { 1378 this_mv.col = c; 1379 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1380 mvjsadcost, mvsadcost, sad_per_bit); 1381 1382 if (thissad < bestsad) { 1383 bestsad = thissad; 1384 best_mv->row = r; 1385 best_mv->col = c; 1386 } 1387 } 1388 check_here++; 1389 c++; 1390 } 1391 } 1392 1393 while (c < col_max) { 1394 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, 1395 bestsad); 1396 1397 if (thissad < bestsad) { 1398 this_mv.col = c; 1399 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1400 mvjsadcost, mvsadcost, sad_per_bit); 1401 1402 if (thissad < bestsad) { 1403 bestsad = thissad; 1404 best_mv->row = r; 1405 best_mv->col = c; 1406 } 1407 } 1408 1409 check_here++; 1410 c++; 1411 } 1412 } 1413 return bestsad; 1414} 1415 1416int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, 1417 int sad_per_bit, int distance, 1418 const vp9_variance_fn_ptr_t *fn_ptr, 1419 int *mvjcost, int *mvcost[2], 1420 const MV *center_mv, MV *best_mv) { 1421 const MACROBLOCKD *const xd = &x->e_mbd; 1422 const uint8_t *const what = x->plane[0].src.buf; 1423 const int what_stride = x->plane[0].src.stride; 1424 const uint8_t *const in_what = xd->plane[0].pre[0].buf; 1425 const int in_what_stride = xd->plane[0].pre[0].stride; 1426 MV this_mv; 1427 unsigned int bestsad = INT_MAX; 1428 int r, c; 1429 int ref_row = ref_mv->row; 1430 int ref_col = ref_mv->col; 1431 1432 // Apply further limits to prevent us looking using vectors that stretch 1433 // beyond the UMV border 1434 const int row_min = MAX(ref_row - distance, x->mv_row_min); 1435 const int row_max = MIN(ref_row + distance, x->mv_row_max); 1436 const int col_min = MAX(ref_col - distance, x->mv_col_min); 1437 const int col_max = MIN(ref_col + distance, x->mv_col_max); 1438 DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8); 1439 unsigned int sad_array[3]; 1440 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 1441 1442 const int *mvjsadcost = x->nmvjointsadcost; 1443 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1444 1445 // Work out the mid point for the search 1446 const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col]; 1447 1448 best_mv->row = ref_row; 1449 best_mv->col = ref_col; 1450 1451 // Baseline value at the center 1452 bestsad = fn_ptr->sdf(what, what_stride, 1453 bestaddress, in_what_stride, 0x7fffffff) 1454 + mvsad_err_cost(best_mv, &fcenter_mv, 1455 mvjsadcost, mvsadcost, sad_per_bit); 1456 1457 for (r = row_min; r < row_max; r++) { 1458 const uint8_t *check_here = &in_what[r * in_what_stride + col_min]; 1459 this_mv.row = r; 1460 c = col_min; 1461 1462 while ((c + 7) < col_max) { 1463 int i; 1464 1465 fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); 1466 1467 for (i = 0; i < 8; i++) { 1468 unsigned int thissad = (unsigned int)sad_array8[i]; 1469 1470 if (thissad < bestsad) { 1471 this_mv.col = c; 1472 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1473 mvjsadcost, mvsadcost, sad_per_bit); 1474 1475 if (thissad < bestsad) { 1476 bestsad = thissad; 1477 best_mv->row = r; 1478 best_mv->col = c; 1479 } 1480 } 1481 1482 check_here++; 1483 c++; 1484 } 1485 } 1486 1487 while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) { 1488 int i; 1489 1490 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); 1491 1492 for (i = 0; i < 3; i++) { 1493 unsigned int thissad = sad_array[i]; 1494 1495 if (thissad < bestsad) { 1496 this_mv.col = c; 1497 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1498 mvjsadcost, mvsadcost, sad_per_bit); 1499 1500 if (thissad < bestsad) { 1501 bestsad = thissad; 1502 best_mv->row = r; 1503 best_mv->col = c; 1504 } 1505 } 1506 1507 check_here++; 1508 c++; 1509 } 1510 } 1511 1512 while (c < col_max) { 1513 unsigned int thissad = fn_ptr->sdf(what, what_stride, 1514 check_here, in_what_stride, bestsad); 1515 1516 if (thissad < bestsad) { 1517 this_mv.col = c; 1518 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1519 mvjsadcost, mvsadcost, sad_per_bit); 1520 1521 if (thissad < bestsad) { 1522 bestsad = thissad; 1523 best_mv->row = r; 1524 best_mv->col = c; 1525 } 1526 } 1527 1528 check_here++; 1529 c++; 1530 } 1531 } 1532 return bestsad; 1533} 1534 1535int vp9_refining_search_sad_c(const MACROBLOCK *x, 1536 MV *ref_mv, int error_per_bit, 1537 int search_range, 1538 const vp9_variance_fn_ptr_t *fn_ptr, 1539 int *mvjcost, int *mvcost[2], 1540 const MV *center_mv) { 1541 const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; 1542 const MACROBLOCKD *const xd = &x->e_mbd; 1543 const struct buf_2d *const what = &x->plane[0].src; 1544 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1545 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 1546 const int *mvjsadcost = x->nmvjointsadcost; 1547 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1548 1549 unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, 1550 get_buf_from_mv(in_what, ref_mv), 1551 in_what->stride, 0x7fffffff) + 1552 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); 1553 int i, j; 1554 1555 for (i = 0; i < search_range; i++) { 1556 int best_site = -1; 1557 1558 for (j = 0; j < 4; j++) { 1559 const MV mv = {ref_mv->row + neighbors[j].row, 1560 ref_mv->col + neighbors[j].col}; 1561 if (is_mv_in(x, &mv)) { 1562 unsigned int sad = fn_ptr->sdf(what->buf, what->stride, 1563 get_buf_from_mv(in_what, &mv), in_what->stride, best_sad); 1564 if (sad < best_sad) { 1565 sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, 1566 error_per_bit); 1567 if (sad < best_sad) { 1568 best_sad = sad; 1569 best_site = j; 1570 } 1571 } 1572 } 1573 } 1574 1575 if (best_site == -1) { 1576 break; 1577 } else { 1578 ref_mv->row += neighbors[best_site].row; 1579 ref_mv->col += neighbors[best_site].col; 1580 } 1581 } 1582 return best_sad; 1583} 1584 1585int vp9_refining_search_sadx4(const MACROBLOCK *x, 1586 MV *ref_mv, int error_per_bit, 1587 int search_range, 1588 const vp9_variance_fn_ptr_t *fn_ptr, 1589 int *mvjcost, int *mvcost[2], 1590 const MV *center_mv) { 1591 const MACROBLOCKD *const xd = &x->e_mbd; 1592 const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; 1593 const struct buf_2d *const what = &x->plane[0].src; 1594 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1595 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 1596 const int *mvjsadcost = x->nmvjointsadcost; 1597 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1598 const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv); 1599 unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address, 1600 in_what->stride, 0x7fffffff) + 1601 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); 1602 int i, j; 1603 1604 for (i = 0; i < search_range; i++) { 1605 int best_site = -1; 1606 const int all_in = ((ref_mv->row - 1) > x->mv_row_min) & 1607 ((ref_mv->row + 1) < x->mv_row_max) & 1608 ((ref_mv->col - 1) > x->mv_col_min) & 1609 ((ref_mv->col + 1) < x->mv_col_max); 1610 1611 if (all_in) { 1612 unsigned int sads[4]; 1613 const uint8_t *const positions[4] = { 1614 best_address - in_what->stride, 1615 best_address - 1, 1616 best_address + 1, 1617 best_address + in_what->stride 1618 }; 1619 1620 fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads); 1621 1622 for (j = 0; j < 4; ++j) { 1623 if (sads[j] < best_sad) { 1624 const MV mv = {ref_mv->row + neighbors[j].row, 1625 ref_mv->col + neighbors[j].col}; 1626 sads[j] += mvsad_err_cost(&mv, &fcenter_mv, 1627 mvjsadcost, mvsadcost, error_per_bit); 1628 1629 if (sads[j] < best_sad) { 1630 best_sad = sads[j]; 1631 best_site = j; 1632 } 1633 } 1634 } 1635 } else { 1636 for (j = 0; j < 4; ++j) { 1637 const MV mv = {ref_mv->row + neighbors[j].row, 1638 ref_mv->col + neighbors[j].col}; 1639 1640 if (is_mv_in(x, &mv)) { 1641 unsigned int sad = fn_ptr->sdf(what->buf, what->stride, 1642 get_buf_from_mv(in_what, &mv), 1643 in_what->stride, best_sad); 1644 if (sad < best_sad) { 1645 sad += mvsad_err_cost(&mv, &fcenter_mv, 1646 mvjsadcost, mvsadcost, error_per_bit); 1647 1648 if (sad < best_sad) { 1649 best_sad = sad; 1650 best_site = j; 1651 } 1652 } 1653 } 1654 } 1655 } 1656 1657 if (best_site == -1) { 1658 break; 1659 } else { 1660 ref_mv->row += neighbors[best_site].row; 1661 ref_mv->col += neighbors[best_site].col; 1662 best_address = get_buf_from_mv(in_what, ref_mv); 1663 } 1664 } 1665 1666 return best_sad; 1667} 1668 1669// This function is called when we do joint motion search in comp_inter_inter 1670// mode. 1671int vp9_refining_search_8p_c(const MACROBLOCK *x, 1672 MV *ref_mv, int error_per_bit, 1673 int search_range, 1674 const vp9_variance_fn_ptr_t *fn_ptr, 1675 int *mvjcost, int *mvcost[2], 1676 const MV *center_mv, 1677 const uint8_t *second_pred, int w, int h) { 1678 const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}, 1679 {-1, -1}, {1, -1}, {-1, 1}, {1, 1}}; 1680 const MACROBLOCKD *const xd = &x->e_mbd; 1681 const struct buf_2d *const what = &x->plane[0].src; 1682 const struct buf_2d *const in_what = &xd->plane[0].pre[0]; 1683 const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; 1684 const int *mvjsadcost = x->nmvjointsadcost; 1685 int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; 1686 unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride, 1687 get_buf_from_mv(in_what, ref_mv), in_what->stride, 1688 second_pred, 0x7fffffff) + 1689 mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); 1690 int i, j; 1691 1692 for (i = 0; i < search_range; ++i) { 1693 int best_site = -1; 1694 1695 for (j = 0; j < 8; ++j) { 1696 const MV mv = {ref_mv->row + neighbors[j].row, 1697 ref_mv->col + neighbors[j].col}; 1698 1699 if (is_mv_in(x, &mv)) { 1700 unsigned int sad = fn_ptr->sdaf(what->buf, what->stride, 1701 get_buf_from_mv(in_what, &mv), in_what->stride, 1702 second_pred, best_sad); 1703 if (sad < best_sad) { 1704 sad += mvsad_err_cost(&mv, &fcenter_mv, 1705 mvjsadcost, mvsadcost, error_per_bit); 1706 if (sad < best_sad) { 1707 best_sad = sad; 1708 best_site = j; 1709 } 1710 } 1711 } 1712 } 1713 1714 if (best_site == -1) { 1715 break; 1716 } else { 1717 ref_mv->row += neighbors[best_site].row; 1718 ref_mv->col += neighbors[best_site].col; 1719 } 1720 } 1721 return best_sad; 1722} 1723