11d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert/* 21d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 31d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * 41d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * Use of this source code is governed by a BSD-style license 51d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * that can be found in the LICENSE file in the root of the source 61d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * tree. An additional intellectual property rights grant can be found 71d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * in the file PATENTS. All contributing project authors may 81d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * be found in the AUTHORS file in the root of the source tree. 91d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert */ 101d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 111d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 121d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert#include "onyx_int.h" 131d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert#include "mcomp.h" 141d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert#include "vpx_mem/vpx_mem.h" 151d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert#include "vpx_config.h" 161d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert#include <stdio.h> 171d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert#include <limits.h> 181d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert#include <math.h> 191d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert#include "vp8/common/findnearmv.h" 201d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert#include "vp8/common/common.h" 211d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 221d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert#ifdef VP8_ENTROPY_STATS 231d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertstatic int mv_ref_ct [31] [4] [2]; 241d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertstatic int mv_mode_cts [4] [2]; 251d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert#endif 261d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 271d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertint vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) 281d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert{ 291d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /* MV costing is based on the distribution of vectors in the previous 301d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * frame and as such will tend to over state the cost of vectors. In 311d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * addition coding a new vector can have a knock on effect on the cost 321d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * of subsequent vectors and the quality of prediction from NEAR and 331d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * NEAREST for subsequent blocks. The "Weight" parameter allows, to a 341d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * limited extent, for some account to be taken of these factors. 351d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert */ 361d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) * Weight) >> 7; 371d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert} 381d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 391d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertstatic int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit) 401d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert{ 411d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /* Ignore mv costing if mvcost is NULL */ 421d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert if (mvcost) 431d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] + 441d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) 451d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * error_per_bit + 128) >> 8; 461d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert return 0; 471d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert} 481d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 491d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertstatic int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit) 501d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert{ 511d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /* Calculate sad error cost on full pixel basis. */ 521d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /* Ignore mv costing if mvsadcost is NULL */ 531d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert if (mvsadcost) 541d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] + 551d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) 561d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * error_per_bit + 128) >> 8; 571d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert return 0; 581d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert} 591d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 601d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertvoid vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) 611d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert{ 621d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert int Len; 631d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert int search_site_count = 0; 641d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 651d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 661d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /* Generate offsets for 4 search sites per step. */ 671d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert Len = MAX_FIRST_STEP; 681d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.col = 0; 691d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.row = 0; 701d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].offset = 0; 711d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert search_site_count++; 721d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 731d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert while (Len > 0) 741d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert { 751d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 761d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /* Compute offsets for search sites. */ 771d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.col = 0; 781d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.row = -Len; 791d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].offset = -Len * stride; 801d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert search_site_count++; 811d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 821d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /* Compute offsets for search sites. */ 831d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.col = 0; 841d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.row = Len; 851d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].offset = Len * stride; 861d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert search_site_count++; 871d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 881d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /* Compute offsets for search sites. */ 891d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.col = -Len; 901d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.row = 0; 911d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].offset = -Len; 921d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert search_site_count++; 931d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 941d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /* Compute offsets for search sites. */ 951d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.col = Len; 961d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.row = 0; 971d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].offset = Len; 981d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert search_site_count++; 991d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 1001d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /* Contract. */ 1011d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert Len /= 2; 1021d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert } 1031d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 1041d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss_count = search_site_count; 1051d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->searches_per_step = 4; 1061d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert} 1071d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 1081d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertvoid vp8_init3smotion_compensation(MACROBLOCK *x, int stride) 1091d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert{ 1101d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert int Len; 1111d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert int search_site_count = 0; 1121d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 1131d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /* Generate offsets for 8 search sites per step. */ 1141d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert Len = MAX_FIRST_STEP; 1151d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.col = 0; 1161d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.row = 0; 1171d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].offset = 0; 1181d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert search_site_count++; 1191d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 1201d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert while (Len > 0) 1211d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert { 1221d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 1231d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /* Compute offsets for search sites. */ 1241d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.col = 0; 1251d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.row = -Len; 1261d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].offset = -Len * stride; 1271d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert search_site_count++; 1281d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 1291d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /* Compute offsets for search sites. */ 1301d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.col = 0; 1311d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.row = Len; 1321d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].offset = Len * stride; 1331d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert search_site_count++; 1341d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 1351d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /* Compute offsets for search sites. */ 1361d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.col = -Len; 1371d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.row = 0; 1381d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].offset = -Len; 1391d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert search_site_count++; 1401d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 1411d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /* Compute offsets for search sites. */ 1421d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.col = Len; 1431d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.row = 0; 1441d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].offset = Len; 1451d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert search_site_count++; 1461d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 1471d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /* Compute offsets for search sites. */ 1481d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.col = -Len; 1491d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.row = -Len; 1501d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].offset = -Len * stride - Len; 1511d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert search_site_count++; 1521d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 1531d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /* Compute offsets for search sites. */ 1541d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.col = Len; 1551d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.row = -Len; 1561d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].offset = -Len * stride + Len; 1571d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert search_site_count++; 1581d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 1591d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /* Compute offsets for search sites. */ 1601d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.col = -Len; 1611d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.row = Len; 1621d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].offset = Len * stride - Len; 1631d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert search_site_count++; 1641d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 1651d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /* Compute offsets for search sites. */ 1661d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.col = Len; 1671d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].mv.row = Len; 1681d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss[search_site_count].offset = Len * stride + Len; 1691d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert search_site_count++; 1701d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 1711d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 1721d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert /* Contract. */ 1731d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert Len /= 2; 1741d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert } 1751d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 1761d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->ss_count = search_site_count; 1771d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert x->searches_per_step = 8; 1781d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert} 1791d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 1801d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert/* 1811d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * To avoid the penalty for crossing cache-line read, preload the reference 1821d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * area in a small buffer, which is aligned to make sure there won't be crossing 1831d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * cache-line read while reading from this buffer. This reduced the cpu 1841d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * cycles spent on reading ref data in sub-pixel filter functions. 1851d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x 1861d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we 1871d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert * could reduce the area. 1881d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert */ 1891d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 1901d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert/* estimated cost of a motion vector (r,c) */ 1911d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert#define MVC(r,c) (mvcost ? ((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 : 0) 1921d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert/* pointer to predictor base of a motionvector */ 1931d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert#define PRE(r,c) (y + (((r)>>2) * y_stride + ((c)>>2) -(offset))) 1941d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert/* convert motion vector component to offset for svf calc */ 1951d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert#define SP(x) (((x)&3)<<1) 1961d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert/* returns subpixel variance error function. */ 1971d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert#define DIST(r,c) vfp->svf( PRE(r,c), y_stride, SP(c),SP(r), z,b->src_stride,&sse) 1981d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert#define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e; 1991d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert/* returns distortion + motion vector cost */ 2001d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert#define ERR(r,c) (MVC(r,c)+DIST(r,c)) 2011d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert/* checks if (r,c) has better score than previous best */ 2021d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=UINT_MAX;) 2031d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringert 2041d580d0f6ee4f21eb309ba7b509d2c6d671c4044Bjorn Bringertint vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, 205 int_mv *bestmv, int_mv *ref_mv, 206 int error_per_bit, 207 const vp8_variance_fn_ptr_t *vfp, 208 int *mvcost[2], int *distortion, 209 unsigned int *sse1) 210{ 211 unsigned char *z = (*(b->base_src) + b->src); 212 213 int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1; 214 int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4; 215 int tr = br, tc = bc; 216 unsigned int besterr; 217 unsigned int left, right, up, down, diag; 218 unsigned int sse; 219 unsigned int whichdir; 220 unsigned int halfiters = 4; 221 unsigned int quarteriters = 4; 222 int thismse; 223 224 int minc = MAX(x->mv_col_min * 4, 225 (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1)); 226 int maxc = MIN(x->mv_col_max * 4, 227 (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1)); 228 int minr = MAX(x->mv_row_min * 4, 229 (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1)); 230 int maxr = MIN(x->mv_row_max * 4, 231 (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1)); 232 233 int y_stride; 234 int offset; 235 int pre_stride = x->e_mbd.pre.y_stride; 236 unsigned char *base_pre = x->e_mbd.pre.y_buffer; 237 238 239#if ARCH_X86 || ARCH_X86_64 240 MACROBLOCKD *xd = &x->e_mbd; 241 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; 242 unsigned char *y; 243 int buf_r1, buf_r2, buf_c1; 244 245 /* Clamping to avoid out-of-range data access */ 246 buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)?(bestmv->as_mv.row - x->mv_row_min):3; 247 buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)?(x->mv_row_max - bestmv->as_mv.row):3; 248 buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)?(bestmv->as_mv.col - x->mv_col_min):3; 249 y_stride = 32; 250 251 /* Copy to intermediate buffer before searching. */ 252 vfp->copymem(y_0 - buf_c1 - pre_stride*buf_r1, pre_stride, xd->y_buf, y_stride, 16+buf_r1+buf_r2); 253 y = xd->y_buf + y_stride*buf_r1 +buf_c1; 254#else 255 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; 256 y_stride = pre_stride; 257#endif 258 259 offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; 260 261 /* central mv */ 262 bestmv->as_mv.row *= 8; 263 bestmv->as_mv.col *= 8; 264 265 /* calculate central point error */ 266 besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1); 267 *distortion = besterr; 268 besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); 269 270 /* TODO: Each subsequent iteration checks at least one point in common 271 * with the last iteration could be 2 ( if diag selected) 272 */ 273 while (--halfiters) 274 { 275 /* 1/2 pel */ 276 CHECK_BETTER(left, tr, tc - 2); 277 CHECK_BETTER(right, tr, tc + 2); 278 CHECK_BETTER(up, tr - 2, tc); 279 CHECK_BETTER(down, tr + 2, tc); 280 281 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); 282 283 switch (whichdir) 284 { 285 case 0: 286 CHECK_BETTER(diag, tr - 2, tc - 2); 287 break; 288 case 1: 289 CHECK_BETTER(diag, tr - 2, tc + 2); 290 break; 291 case 2: 292 CHECK_BETTER(diag, tr + 2, tc - 2); 293 break; 294 case 3: 295 CHECK_BETTER(diag, tr + 2, tc + 2); 296 break; 297 } 298 299 /* no reason to check the same one again. */ 300 if (tr == br && tc == bc) 301 break; 302 303 tr = br; 304 tc = bc; 305 } 306 307 /* TODO: Each subsequent iteration checks at least one point in common 308 * with the last iteration could be 2 ( if diag selected) 309 */ 310 311 /* 1/4 pel */ 312 while (--quarteriters) 313 { 314 CHECK_BETTER(left, tr, tc - 1); 315 CHECK_BETTER(right, tr, tc + 1); 316 CHECK_BETTER(up, tr - 1, tc); 317 CHECK_BETTER(down, tr + 1, tc); 318 319 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); 320 321 switch (whichdir) 322 { 323 case 0: 324 CHECK_BETTER(diag, tr - 1, tc - 1); 325 break; 326 case 1: 327 CHECK_BETTER(diag, tr - 1, tc + 1); 328 break; 329 case 2: 330 CHECK_BETTER(diag, tr + 1, tc - 1); 331 break; 332 case 3: 333 CHECK_BETTER(diag, tr + 1, tc + 1); 334 break; 335 } 336 337 /* no reason to check the same one again. */ 338 if (tr == br && tc == bc) 339 break; 340 341 tr = br; 342 tc = bc; 343 } 344 345 bestmv->as_mv.row = br * 2; 346 bestmv->as_mv.col = bc * 2; 347 348 if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) || 349 (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3))) 350 return INT_MAX; 351 352 return besterr; 353} 354#undef MVC 355#undef PRE 356#undef SP 357#undef DIST 358#undef IFMVCV 359#undef ERR 360#undef CHECK_BETTER 361 362int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, 363 int_mv *bestmv, int_mv *ref_mv, 364 int error_per_bit, 365 const vp8_variance_fn_ptr_t *vfp, 366 int *mvcost[2], int *distortion, 367 unsigned int *sse1) 368{ 369 int bestmse = INT_MAX; 370 int_mv startmv; 371 int_mv this_mv; 372 unsigned char *z = (*(b->base_src) + b->src); 373 int left, right, up, down, diag; 374 unsigned int sse; 375 int whichdir ; 376 int thismse; 377 int y_stride; 378 int pre_stride = x->e_mbd.pre.y_stride; 379 unsigned char *base_pre = x->e_mbd.pre.y_buffer; 380 381#if ARCH_X86 || ARCH_X86_64 382 MACROBLOCKD *xd = &x->e_mbd; 383 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; 384 unsigned char *y; 385 386 y_stride = 32; 387 /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ 388 vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18); 389 y = xd->y_buf + y_stride + 1; 390#else 391 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; 392 y_stride = pre_stride; 393#endif 394 395 /* central mv */ 396 bestmv->as_mv.row <<= 3; 397 bestmv->as_mv.col <<= 3; 398 startmv = *bestmv; 399 400 /* calculate central point error */ 401 bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); 402 *distortion = bestmse; 403 bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); 404 405 /* go left then right and check error */ 406 this_mv.as_mv.row = startmv.as_mv.row; 407 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); 408 thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse); 409 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 410 411 if (left < bestmse) 412 { 413 *bestmv = this_mv; 414 bestmse = left; 415 *distortion = thismse; 416 *sse1 = sse; 417 } 418 419 this_mv.as_mv.col += 8; 420 thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse); 421 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 422 423 if (right < bestmse) 424 { 425 *bestmv = this_mv; 426 bestmse = right; 427 *distortion = thismse; 428 *sse1 = sse; 429 } 430 431 /* go up then down and check error */ 432 this_mv.as_mv.col = startmv.as_mv.col; 433 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); 434 thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse); 435 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 436 437 if (up < bestmse) 438 { 439 *bestmv = this_mv; 440 bestmse = up; 441 *distortion = thismse; 442 *sse1 = sse; 443 } 444 445 this_mv.as_mv.row += 8; 446 thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse); 447 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 448 449 if (down < bestmse) 450 { 451 *bestmv = this_mv; 452 bestmse = down; 453 *distortion = thismse; 454 *sse1 = sse; 455 } 456 457 458 /* now check 1 more diagonal */ 459 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); 460 this_mv = startmv; 461 462 switch (whichdir) 463 { 464 case 0: 465 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; 466 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; 467 thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse); 468 break; 469 case 1: 470 this_mv.as_mv.col += 4; 471 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; 472 thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse); 473 break; 474 case 2: 475 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; 476 this_mv.as_mv.row += 4; 477 thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse); 478 break; 479 case 3: 480 default: 481 this_mv.as_mv.col += 4; 482 this_mv.as_mv.row += 4; 483 thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse); 484 break; 485 } 486 487 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 488 489 if (diag < bestmse) 490 { 491 *bestmv = this_mv; 492 bestmse = diag; 493 *distortion = thismse; 494 *sse1 = sse; 495 } 496 497 498 /* time to check quarter pels. */ 499 if (bestmv->as_mv.row < startmv.as_mv.row) 500 y -= y_stride; 501 502 if (bestmv->as_mv.col < startmv.as_mv.col) 503 y--; 504 505 startmv = *bestmv; 506 507 508 509 /* go left then right and check error */ 510 this_mv.as_mv.row = startmv.as_mv.row; 511 512 if (startmv.as_mv.col & 7) 513 { 514 this_mv.as_mv.col = startmv.as_mv.col - 2; 515 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); 516 } 517 else 518 { 519 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; 520 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse); 521 } 522 523 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 524 525 if (left < bestmse) 526 { 527 *bestmv = this_mv; 528 bestmse = left; 529 *distortion = thismse; 530 *sse1 = sse; 531 } 532 533 this_mv.as_mv.col += 4; 534 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); 535 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 536 537 if (right < bestmse) 538 { 539 *bestmv = this_mv; 540 bestmse = right; 541 *distortion = thismse; 542 *sse1 = sse; 543 } 544 545 /* go up then down and check error */ 546 this_mv.as_mv.col = startmv.as_mv.col; 547 548 if (startmv.as_mv.row & 7) 549 { 550 this_mv.as_mv.row = startmv.as_mv.row - 2; 551 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); 552 } 553 else 554 { 555 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; 556 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse); 557 } 558 559 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 560 561 if (up < bestmse) 562 { 563 *bestmv = this_mv; 564 bestmse = up; 565 *distortion = thismse; 566 *sse1 = sse; 567 } 568 569 this_mv.as_mv.row += 4; 570 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); 571 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 572 573 if (down < bestmse) 574 { 575 *bestmv = this_mv; 576 bestmse = down; 577 *distortion = thismse; 578 *sse1 = sse; 579 } 580 581 582 /* now check 1 more diagonal */ 583 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); 584 585 this_mv = startmv; 586 587 switch (whichdir) 588 { 589 case 0: 590 591 if (startmv.as_mv.row & 7) 592 { 593 this_mv.as_mv.row -= 2; 594 595 if (startmv.as_mv.col & 7) 596 { 597 this_mv.as_mv.col -= 2; 598 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); 599 } 600 else 601 { 602 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; 603 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse);; 604 } 605 } 606 else 607 { 608 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; 609 610 if (startmv.as_mv.col & 7) 611 { 612 this_mv.as_mv.col -= 2; 613 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse); 614 } 615 else 616 { 617 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; 618 thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride, &sse); 619 } 620 } 621 622 break; 623 case 1: 624 this_mv.as_mv.col += 2; 625 626 if (startmv.as_mv.row & 7) 627 { 628 this_mv.as_mv.row -= 2; 629 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); 630 } 631 else 632 { 633 this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; 634 thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse); 635 } 636 637 break; 638 case 2: 639 this_mv.as_mv.row += 2; 640 641 if (startmv.as_mv.col & 7) 642 { 643 this_mv.as_mv.col -= 2; 644 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); 645 } 646 else 647 { 648 this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; 649 thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse); 650 } 651 652 break; 653 case 3: 654 this_mv.as_mv.col += 2; 655 this_mv.as_mv.row += 2; 656 thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); 657 break; 658 } 659 660 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 661 662 if (diag < bestmse) 663 { 664 *bestmv = this_mv; 665 bestmse = diag; 666 *distortion = thismse; 667 *sse1 = sse; 668 } 669 670 return bestmse; 671} 672 673int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, 674 int_mv *bestmv, int_mv *ref_mv, 675 int error_per_bit, 676 const vp8_variance_fn_ptr_t *vfp, 677 int *mvcost[2], int *distortion, 678 unsigned int *sse1) 679{ 680 int bestmse = INT_MAX; 681 int_mv startmv; 682 int_mv this_mv; 683 unsigned char *z = (*(b->base_src) + b->src); 684 int left, right, up, down, diag; 685 unsigned int sse; 686 int whichdir ; 687 int thismse; 688 int y_stride; 689 int pre_stride = x->e_mbd.pre.y_stride; 690 unsigned char *base_pre = x->e_mbd.pre.y_buffer; 691 692#if ARCH_X86 || ARCH_X86_64 693 MACROBLOCKD *xd = &x->e_mbd; 694 unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; 695 unsigned char *y; 696 697 y_stride = 32; 698 /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ 699 vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18); 700 y = xd->y_buf + y_stride + 1; 701#else 702 unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; 703 y_stride = pre_stride; 704#endif 705 706 /* central mv */ 707 bestmv->as_mv.row *= 8; 708 bestmv->as_mv.col *= 8; 709 startmv = *bestmv; 710 711 /* calculate central point error */ 712 bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); 713 *distortion = bestmse; 714 bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); 715 716 /* go left then right and check error */ 717 this_mv.as_mv.row = startmv.as_mv.row; 718 this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); 719 thismse = vfp->svf_halfpix_h(y - 1, y_stride, z, b->src_stride, &sse); 720 left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 721 722 if (left < bestmse) 723 { 724 *bestmv = this_mv; 725 bestmse = left; 726 *distortion = thismse; 727 *sse1 = sse; 728 } 729 730 this_mv.as_mv.col += 8; 731 thismse = vfp->svf_halfpix_h(y, y_stride, z, b->src_stride, &sse); 732 right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 733 734 if (right < bestmse) 735 { 736 *bestmv = this_mv; 737 bestmse = right; 738 *distortion = thismse; 739 *sse1 = sse; 740 } 741 742 /* go up then down and check error */ 743 this_mv.as_mv.col = startmv.as_mv.col; 744 this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); 745 thismse = vfp->svf_halfpix_v(y - y_stride, y_stride, z, b->src_stride, &sse); 746 up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 747 748 if (up < bestmse) 749 { 750 *bestmv = this_mv; 751 bestmse = up; 752 *distortion = thismse; 753 *sse1 = sse; 754 } 755 756 this_mv.as_mv.row += 8; 757 thismse = vfp->svf_halfpix_v(y, y_stride, z, b->src_stride, &sse); 758 down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 759 760 if (down < bestmse) 761 { 762 *bestmv = this_mv; 763 bestmse = down; 764 *distortion = thismse; 765 *sse1 = sse; 766 } 767 768 /* now check 1 more diagonal - */ 769 whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); 770 this_mv = startmv; 771 772 switch (whichdir) 773 { 774 case 0: 775 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; 776 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; 777 thismse = vfp->svf_halfpix_hv(y - 1 - y_stride, y_stride, z, b->src_stride, &sse); 778 break; 779 case 1: 780 this_mv.as_mv.col += 4; 781 this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; 782 thismse = vfp->svf_halfpix_hv(y - y_stride, y_stride, z, b->src_stride, &sse); 783 break; 784 case 2: 785 this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; 786 this_mv.as_mv.row += 4; 787 thismse = vfp->svf_halfpix_hv(y - 1, y_stride, z, b->src_stride, &sse); 788 break; 789 case 3: 790 default: 791 this_mv.as_mv.col += 4; 792 this_mv.as_mv.row += 4; 793 thismse = vfp->svf_halfpix_hv(y, y_stride, z, b->src_stride, &sse); 794 break; 795 } 796 797 diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); 798 799 if (diag < bestmse) 800 { 801 *bestmv = this_mv; 802 bestmse = diag; 803 *distortion = thismse; 804 *sse1 = sse; 805 } 806 807 return bestmse; 808} 809 810#define CHECK_BOUNDS(range) \ 811{\ 812 all_in = 1;\ 813 all_in &= ((br-range) >= x->mv_row_min);\ 814 all_in &= ((br+range) <= x->mv_row_max);\ 815 all_in &= ((bc-range) >= x->mv_col_min);\ 816 all_in &= ((bc+range) <= x->mv_col_max);\ 817} 818 819#define CHECK_POINT \ 820{\ 821 if (this_mv.as_mv.col < x->mv_col_min) continue;\ 822 if (this_mv.as_mv.col > x->mv_col_max) continue;\ 823 if (this_mv.as_mv.row < x->mv_row_min) continue;\ 824 if (this_mv.as_mv.row > x->mv_row_max) continue;\ 825} 826 827#define CHECK_BETTER \ 828{\ 829 if (thissad < bestsad)\ 830 {\ 831 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);\ 832 if (thissad < bestsad)\ 833 {\ 834 bestsad = thissad;\ 835 best_site = i;\ 836 }\ 837 }\ 838} 839 840static const MV next_chkpts[6][3] = 841{ 842 {{ -2, 0}, { -1, -2}, {1, -2}}, 843 {{ -1, -2}, {1, -2}, {2, 0}}, 844 {{1, -2}, {2, 0}, {1, 2}}, 845 {{2, 0}, {1, 2}, { -1, 2}}, 846 {{1, 2}, { -1, 2}, { -2, 0}}, 847 {{ -1, 2}, { -2, 0}, { -1, -2}} 848}; 849 850int vp8_hex_search 851( 852 MACROBLOCK *x, 853 BLOCK *b, 854 BLOCKD *d, 855 int_mv *ref_mv, 856 int_mv *best_mv, 857 int search_param, 858 int sad_per_bit, 859 const vp8_variance_fn_ptr_t *vfp, 860 int *mvsadcost[2], 861 int *mvcost[2], 862 int_mv *center_mv 863) 864{ 865 MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ; 866 MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}} ; 867 int i, j; 868 869 unsigned char *what = (*(b->base_src) + b->src); 870 int what_stride = b->src_stride; 871 int pre_stride = x->e_mbd.pre.y_stride; 872 unsigned char *base_pre = x->e_mbd.pre.y_buffer; 873 874 int in_what_stride = pre_stride; 875 int br, bc; 876 int_mv this_mv; 877 unsigned int bestsad; 878 unsigned int thissad; 879 unsigned char *base_offset; 880 unsigned char *this_offset; 881 int k = -1; 882 int all_in; 883 int best_site = -1; 884 int hex_range = 127; 885 int dia_range = 8; 886 887 int_mv fcenter_mv; 888 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 889 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 890 891 /* adjust ref_mv to make sure it is within MV range */ 892 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 893 br = ref_mv->as_mv.row; 894 bc = ref_mv->as_mv.col; 895 896 /* Work out the start point for the search */ 897 base_offset = (unsigned char *)(base_pre + d->offset); 898 this_offset = base_offset + (br * (pre_stride)) + bc; 899 this_mv.as_mv.row = br; 900 this_mv.as_mv.col = bc; 901 bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, UINT_MAX) 902 + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); 903 904#if CONFIG_MULTI_RES_ENCODING 905 /* Lower search range based on prediction info */ 906 if (search_param >= 6) goto cal_neighbors; 907 else if (search_param >= 5) hex_range = 4; 908 else if (search_param >= 4) hex_range = 6; 909 else if (search_param >= 3) hex_range = 15; 910 else if (search_param >= 2) hex_range = 31; 911 else if (search_param >= 1) hex_range = 63; 912 913 dia_range = 8; 914#endif 915 916 /* hex search */ 917 CHECK_BOUNDS(2) 918 919 if(all_in) 920 { 921 for (i = 0; i < 6; i++) 922 { 923 this_mv.as_mv.row = br + hex[i].row; 924 this_mv.as_mv.col = bc + hex[i].col; 925 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col; 926 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); 927 CHECK_BETTER 928 } 929 }else 930 { 931 for (i = 0; i < 6; i++) 932 { 933 this_mv.as_mv.row = br + hex[i].row; 934 this_mv.as_mv.col = bc + hex[i].col; 935 CHECK_POINT 936 this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col; 937 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); 938 CHECK_BETTER 939 } 940 } 941 942 if (best_site == -1) 943 goto cal_neighbors; 944 else 945 { 946 br += hex[best_site].row; 947 bc += hex[best_site].col; 948 k = best_site; 949 } 950 951 for (j = 1; j < hex_range; j++) 952 { 953 best_site = -1; 954 CHECK_BOUNDS(2) 955 956 if(all_in) 957 { 958 for (i = 0; i < 3; i++) 959 { 960 this_mv.as_mv.row = br + next_chkpts[k][i].row; 961 this_mv.as_mv.col = bc + next_chkpts[k][i].col; 962 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; 963 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); 964 CHECK_BETTER 965 } 966 }else 967 { 968 for (i = 0; i < 3; i++) 969 { 970 this_mv.as_mv.row = br + next_chkpts[k][i].row; 971 this_mv.as_mv.col = bc + next_chkpts[k][i].col; 972 CHECK_POINT 973 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; 974 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); 975 CHECK_BETTER 976 } 977 } 978 979 if (best_site == -1) 980 break; 981 else 982 { 983 br += next_chkpts[k][best_site].row; 984 bc += next_chkpts[k][best_site].col; 985 k += 5 + best_site; 986 if (k >= 12) k -= 12; 987 else if (k >= 6) k -= 6; 988 } 989 } 990 991 /* check 4 1-away neighbors */ 992cal_neighbors: 993 for (j = 0; j < dia_range; j++) 994 { 995 best_site = -1; 996 CHECK_BOUNDS(1) 997 998 if(all_in) 999 { 1000 for (i = 0; i < 4; i++) 1001 { 1002 this_mv.as_mv.row = br + neighbors[i].row; 1003 this_mv.as_mv.col = bc + neighbors[i].col; 1004 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; 1005 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); 1006 CHECK_BETTER 1007 } 1008 }else 1009 { 1010 for (i = 0; i < 4; i++) 1011 { 1012 this_mv.as_mv.row = br + neighbors[i].row; 1013 this_mv.as_mv.col = bc + neighbors[i].col; 1014 CHECK_POINT 1015 this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; 1016 thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); 1017 CHECK_BETTER 1018 } 1019 } 1020 1021 if (best_site == -1) 1022 break; 1023 else 1024 { 1025 br += neighbors[best_site].row; 1026 bc += neighbors[best_site].col; 1027 } 1028 } 1029 1030 best_mv->as_mv.row = br; 1031 best_mv->as_mv.col = bc; 1032 1033 return bestsad; 1034} 1035#undef CHECK_BOUNDS 1036#undef CHECK_POINT 1037#undef CHECK_BETTER 1038 1039int vp8_diamond_search_sad_c 1040( 1041 MACROBLOCK *x, 1042 BLOCK *b, 1043 BLOCKD *d, 1044 int_mv *ref_mv, 1045 int_mv *best_mv, 1046 int search_param, 1047 int sad_per_bit, 1048 int *num00, 1049 vp8_variance_fn_ptr_t *fn_ptr, 1050 int *mvcost[2], 1051 int_mv *center_mv 1052) 1053{ 1054 int i, j, step; 1055 1056 unsigned char *what = (*(b->base_src) + b->src); 1057 int what_stride = b->src_stride; 1058 unsigned char *in_what; 1059 int pre_stride = x->e_mbd.pre.y_stride; 1060 unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1061 int in_what_stride = pre_stride; 1062 unsigned char *best_address; 1063 1064 int tot_steps; 1065 int_mv this_mv; 1066 1067 unsigned int bestsad; 1068 unsigned int thissad; 1069 int best_site = 0; 1070 int last_site = 0; 1071 1072 int ref_row; 1073 int ref_col; 1074 int this_row_offset; 1075 int this_col_offset; 1076 search_site *ss; 1077 1078 unsigned char *check_here; 1079 1080 int *mvsadcost[2]; 1081 int_mv fcenter_mv; 1082 1083 mvsadcost[0] = x->mvsadcost[0]; 1084 mvsadcost[1] = x->mvsadcost[1]; 1085 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1086 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1087 1088 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 1089 ref_row = ref_mv->as_mv.row; 1090 ref_col = ref_mv->as_mv.col; 1091 *num00 = 0; 1092 best_mv->as_mv.row = ref_row; 1093 best_mv->as_mv.col = ref_col; 1094 1095 /* Work out the start point for the search */ 1096 in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col); 1097 best_address = in_what; 1098 1099 /* Check the starting position */ 1100 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX) 1101 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1102 1103 /* search_param determines the length of the initial step and hence 1104 * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1105 * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. 1106 */ 1107 ss = &x->ss[search_param * x->searches_per_step]; 1108 tot_steps = (x->ss_count / x->searches_per_step) - search_param; 1109 1110 i = 1; 1111 1112 for (step = 0; step < tot_steps ; step++) 1113 { 1114 for (j = 0 ; j < x->searches_per_step ; j++) 1115 { 1116 /* Trap illegal vectors */ 1117 this_row_offset = best_mv->as_mv.row + ss[i].mv.row; 1118 this_col_offset = best_mv->as_mv.col + ss[i].mv.col; 1119 1120 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && 1121 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) 1122 1123 { 1124 check_here = ss[i].offset + best_address; 1125 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); 1126 1127 if (thissad < bestsad) 1128 { 1129 this_mv.as_mv.row = this_row_offset; 1130 this_mv.as_mv.col = this_col_offset; 1131 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1132 mvsadcost, sad_per_bit); 1133 1134 if (thissad < bestsad) 1135 { 1136 bestsad = thissad; 1137 best_site = i; 1138 } 1139 } 1140 } 1141 1142 i++; 1143 } 1144 1145 if (best_site != last_site) 1146 { 1147 best_mv->as_mv.row += ss[best_site].mv.row; 1148 best_mv->as_mv.col += ss[best_site].mv.col; 1149 best_address += ss[best_site].offset; 1150 last_site = best_site; 1151 } 1152 else if (best_address == in_what) 1153 (*num00)++; 1154 } 1155 1156 this_mv.as_mv.row = best_mv->as_mv.row << 3; 1157 this_mv.as_mv.col = best_mv->as_mv.col << 3; 1158 1159 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) 1160 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); 1161} 1162 1163int vp8_diamond_search_sadx4 1164( 1165 MACROBLOCK *x, 1166 BLOCK *b, 1167 BLOCKD *d, 1168 int_mv *ref_mv, 1169 int_mv *best_mv, 1170 int search_param, 1171 int sad_per_bit, 1172 int *num00, 1173 vp8_variance_fn_ptr_t *fn_ptr, 1174 int *mvcost[2], 1175 int_mv *center_mv 1176) 1177{ 1178 int i, j, step; 1179 1180 unsigned char *what = (*(b->base_src) + b->src); 1181 int what_stride = b->src_stride; 1182 unsigned char *in_what; 1183 int pre_stride = x->e_mbd.pre.y_stride; 1184 unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1185 int in_what_stride = pre_stride; 1186 unsigned char *best_address; 1187 1188 int tot_steps; 1189 int_mv this_mv; 1190 1191 unsigned int bestsad; 1192 unsigned int thissad; 1193 int best_site = 0; 1194 int last_site = 0; 1195 1196 int ref_row; 1197 int ref_col; 1198 int this_row_offset; 1199 int this_col_offset; 1200 search_site *ss; 1201 1202 unsigned char *check_here; 1203 1204 int *mvsadcost[2]; 1205 int_mv fcenter_mv; 1206 1207 mvsadcost[0] = x->mvsadcost[0]; 1208 mvsadcost[1] = x->mvsadcost[1]; 1209 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1210 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1211 1212 vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); 1213 ref_row = ref_mv->as_mv.row; 1214 ref_col = ref_mv->as_mv.col; 1215 *num00 = 0; 1216 best_mv->as_mv.row = ref_row; 1217 best_mv->as_mv.col = ref_col; 1218 1219 /* Work out the start point for the search */ 1220 in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col); 1221 best_address = in_what; 1222 1223 /* Check the starting position */ 1224 bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, UINT_MAX) 1225 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1226 1227 /* search_param determines the length of the initial step and hence the 1228 * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 = 1229 * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. 1230 */ 1231 ss = &x->ss[search_param * x->searches_per_step]; 1232 tot_steps = (x->ss_count / x->searches_per_step) - search_param; 1233 1234 i = 1; 1235 1236 for (step = 0; step < tot_steps ; step++) 1237 { 1238 int all_in = 1, t; 1239 1240 /* To know if all neighbor points are within the bounds, 4 bounds 1241 * checking are enough instead of checking 4 bounds for each 1242 * points. 1243 */ 1244 all_in &= ((best_mv->as_mv.row + ss[i].mv.row)> x->mv_row_min); 1245 all_in &= ((best_mv->as_mv.row + ss[i+1].mv.row) < x->mv_row_max); 1246 all_in &= ((best_mv->as_mv.col + ss[i+2].mv.col) > x->mv_col_min); 1247 all_in &= ((best_mv->as_mv.col + ss[i+3].mv.col) < x->mv_col_max); 1248 1249 if (all_in) 1250 { 1251 unsigned int sad_array[4]; 1252 1253 for (j = 0 ; j < x->searches_per_step ; j += 4) 1254 { 1255 const unsigned char *block_offset[4]; 1256 1257 for (t = 0; t < 4; t++) 1258 block_offset[t] = ss[i+t].offset + best_address; 1259 1260 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array); 1261 1262 for (t = 0; t < 4; t++, i++) 1263 { 1264 if (sad_array[t] < bestsad) 1265 { 1266 this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row; 1267 this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col; 1268 sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv, 1269 mvsadcost, sad_per_bit); 1270 1271 if (sad_array[t] < bestsad) 1272 { 1273 bestsad = sad_array[t]; 1274 best_site = i; 1275 } 1276 } 1277 } 1278 } 1279 } 1280 else 1281 { 1282 for (j = 0 ; j < x->searches_per_step ; j++) 1283 { 1284 /* Trap illegal vectors */ 1285 this_row_offset = best_mv->as_mv.row + ss[i].mv.row; 1286 this_col_offset = best_mv->as_mv.col + ss[i].mv.col; 1287 1288 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && 1289 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) 1290 { 1291 check_here = ss[i].offset + best_address; 1292 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); 1293 1294 if (thissad < bestsad) 1295 { 1296 this_mv.as_mv.row = this_row_offset; 1297 this_mv.as_mv.col = this_col_offset; 1298 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1299 mvsadcost, sad_per_bit); 1300 1301 if (thissad < bestsad) 1302 { 1303 bestsad = thissad; 1304 best_site = i; 1305 } 1306 } 1307 } 1308 i++; 1309 } 1310 } 1311 1312 if (best_site != last_site) 1313 { 1314 best_mv->as_mv.row += ss[best_site].mv.row; 1315 best_mv->as_mv.col += ss[best_site].mv.col; 1316 best_address += ss[best_site].offset; 1317 last_site = best_site; 1318 } 1319 else if (best_address == in_what) 1320 (*num00)++; 1321 } 1322 1323 this_mv.as_mv.row = best_mv->as_mv.row * 8; 1324 this_mv.as_mv.col = best_mv->as_mv.col * 8; 1325 1326 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) 1327 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); 1328} 1329 1330int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, 1331 int sad_per_bit, int distance, 1332 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], 1333 int_mv *center_mv) 1334{ 1335 unsigned char *what = (*(b->base_src) + b->src); 1336 int what_stride = b->src_stride; 1337 unsigned char *in_what; 1338 int pre_stride = x->e_mbd.pre.y_stride; 1339 unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1340 int in_what_stride = pre_stride; 1341 int mv_stride = pre_stride; 1342 unsigned char *bestaddress; 1343 int_mv *best_mv = &d->bmi.mv; 1344 int_mv this_mv; 1345 unsigned int bestsad; 1346 unsigned int thissad; 1347 int r, c; 1348 1349 unsigned char *check_here; 1350 1351 int ref_row = ref_mv->as_mv.row; 1352 int ref_col = ref_mv->as_mv.col; 1353 1354 int row_min = ref_row - distance; 1355 int row_max = ref_row + distance; 1356 int col_min = ref_col - distance; 1357 int col_max = ref_col + distance; 1358 1359 int *mvsadcost[2]; 1360 int_mv fcenter_mv; 1361 1362 mvsadcost[0] = x->mvsadcost[0]; 1363 mvsadcost[1] = x->mvsadcost[1]; 1364 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1365 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1366 1367 /* Work out the mid point for the search */ 1368 in_what = base_pre + d->offset; 1369 bestaddress = in_what + (ref_row * pre_stride) + ref_col; 1370 1371 best_mv->as_mv.row = ref_row; 1372 best_mv->as_mv.col = ref_col; 1373 1374 /* Baseline value at the centre */ 1375 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, 1376 in_what_stride, UINT_MAX) 1377 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1378 1379 /* Apply further limits to prevent us looking using vectors that 1380 * stretch beyiond the UMV border 1381 */ 1382 if (col_min < x->mv_col_min) 1383 col_min = x->mv_col_min; 1384 1385 if (col_max > x->mv_col_max) 1386 col_max = x->mv_col_max; 1387 1388 if (row_min < x->mv_row_min) 1389 row_min = x->mv_row_min; 1390 1391 if (row_max > x->mv_row_max) 1392 row_max = x->mv_row_max; 1393 1394 for (r = row_min; r < row_max ; r++) 1395 { 1396 this_mv.as_mv.row = r; 1397 check_here = r * mv_stride + in_what + col_min; 1398 1399 for (c = col_min; c < col_max; c++) 1400 { 1401 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); 1402 1403 this_mv.as_mv.col = c; 1404 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1405 mvsadcost, sad_per_bit); 1406 1407 if (thissad < bestsad) 1408 { 1409 bestsad = thissad; 1410 best_mv->as_mv.row = r; 1411 best_mv->as_mv.col = c; 1412 bestaddress = check_here; 1413 } 1414 1415 check_here++; 1416 } 1417 } 1418 1419 this_mv.as_mv.row = best_mv->as_mv.row << 3; 1420 this_mv.as_mv.col = best_mv->as_mv.col << 3; 1421 1422 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) 1423 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); 1424} 1425 1426int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, 1427 int sad_per_bit, int distance, 1428 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], 1429 int_mv *center_mv) 1430{ 1431 unsigned char *what = (*(b->base_src) + b->src); 1432 int what_stride = b->src_stride; 1433 unsigned char *in_what; 1434 int pre_stride = x->e_mbd.pre.y_stride; 1435 unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1436 int in_what_stride = pre_stride; 1437 int mv_stride = pre_stride; 1438 unsigned char *bestaddress; 1439 int_mv *best_mv = &d->bmi.mv; 1440 int_mv this_mv; 1441 unsigned int bestsad; 1442 unsigned int thissad; 1443 int r, c; 1444 1445 unsigned char *check_here; 1446 1447 int ref_row = ref_mv->as_mv.row; 1448 int ref_col = ref_mv->as_mv.col; 1449 1450 int row_min = ref_row - distance; 1451 int row_max = ref_row + distance; 1452 int col_min = ref_col - distance; 1453 int col_max = ref_col + distance; 1454 1455 unsigned int sad_array[3]; 1456 1457 int *mvsadcost[2]; 1458 int_mv fcenter_mv; 1459 1460 mvsadcost[0] = x->mvsadcost[0]; 1461 mvsadcost[1] = x->mvsadcost[1]; 1462 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1463 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1464 1465 /* Work out the mid point for the search */ 1466 in_what = base_pre + d->offset; 1467 bestaddress = in_what + (ref_row * pre_stride) + ref_col; 1468 1469 best_mv->as_mv.row = ref_row; 1470 best_mv->as_mv.col = ref_col; 1471 1472 /* Baseline value at the centre */ 1473 bestsad = fn_ptr->sdf(what, what_stride, bestaddress, 1474 in_what_stride, UINT_MAX) 1475 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1476 1477 /* Apply further limits to prevent us looking using vectors that stretch 1478 * beyond the UMV border 1479 */ 1480 if (col_min < x->mv_col_min) 1481 col_min = x->mv_col_min; 1482 1483 if (col_max > x->mv_col_max) 1484 col_max = x->mv_col_max; 1485 1486 if (row_min < x->mv_row_min) 1487 row_min = x->mv_row_min; 1488 1489 if (row_max > x->mv_row_max) 1490 row_max = x->mv_row_max; 1491 1492 for (r = row_min; r < row_max ; r++) 1493 { 1494 this_mv.as_mv.row = r; 1495 check_here = r * mv_stride + in_what + col_min; 1496 c = col_min; 1497 1498 while ((c + 2) < col_max) 1499 { 1500 int i; 1501 1502 fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); 1503 1504 for (i = 0; i < 3; i++) 1505 { 1506 thissad = sad_array[i]; 1507 1508 if (thissad < bestsad) 1509 { 1510 this_mv.as_mv.col = c; 1511 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1512 mvsadcost, sad_per_bit); 1513 1514 if (thissad < bestsad) 1515 { 1516 bestsad = thissad; 1517 best_mv->as_mv.row = r; 1518 best_mv->as_mv.col = c; 1519 bestaddress = check_here; 1520 } 1521 } 1522 1523 check_here++; 1524 c++; 1525 } 1526 } 1527 1528 while (c < col_max) 1529 { 1530 thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); 1531 1532 if (thissad < bestsad) 1533 { 1534 this_mv.as_mv.col = c; 1535 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1536 mvsadcost, sad_per_bit); 1537 1538 if (thissad < bestsad) 1539 { 1540 bestsad = thissad; 1541 best_mv->as_mv.row = r; 1542 best_mv->as_mv.col = c; 1543 bestaddress = check_here; 1544 } 1545 } 1546 1547 check_here ++; 1548 c ++; 1549 } 1550 1551 } 1552 1553 this_mv.as_mv.row = best_mv->as_mv.row << 3; 1554 this_mv.as_mv.col = best_mv->as_mv.col << 3; 1555 1556 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) 1557 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); 1558} 1559 1560int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, 1561 int sad_per_bit, int distance, 1562 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], 1563 int_mv *center_mv) 1564{ 1565 unsigned char *what = (*(b->base_src) + b->src); 1566 int what_stride = b->src_stride; 1567 int pre_stride = x->e_mbd.pre.y_stride; 1568 unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1569 unsigned char *in_what; 1570 int in_what_stride = pre_stride; 1571 int mv_stride = pre_stride; 1572 unsigned char *bestaddress; 1573 int_mv *best_mv = &d->bmi.mv; 1574 int_mv this_mv; 1575 unsigned int bestsad; 1576 unsigned int thissad; 1577 int r, c; 1578 1579 unsigned char *check_here; 1580 1581 int ref_row = ref_mv->as_mv.row; 1582 int ref_col = ref_mv->as_mv.col; 1583 1584 int row_min = ref_row - distance; 1585 int row_max = ref_row + distance; 1586 int col_min = ref_col - distance; 1587 int col_max = ref_col + distance; 1588 1589 DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8); 1590 unsigned int sad_array[3]; 1591 1592 int *mvsadcost[2]; 1593 int_mv fcenter_mv; 1594 1595 mvsadcost[0] = x->mvsadcost[0]; 1596 mvsadcost[1] = x->mvsadcost[1]; 1597 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1598 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1599 1600 /* Work out the mid point for the search */ 1601 in_what = base_pre + d->offset; 1602 bestaddress = in_what + (ref_row * pre_stride) + ref_col; 1603 1604 best_mv->as_mv.row = ref_row; 1605 best_mv->as_mv.col = ref_col; 1606 1607 /* Baseline value at the centre */ 1608 bestsad = fn_ptr->sdf(what, what_stride, 1609 bestaddress, in_what_stride, UINT_MAX) 1610 + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); 1611 1612 /* Apply further limits to prevent us looking using vectors that stretch 1613 * beyond the UMV border 1614 */ 1615 if (col_min < x->mv_col_min) 1616 col_min = x->mv_col_min; 1617 1618 if (col_max > x->mv_col_max) 1619 col_max = x->mv_col_max; 1620 1621 if (row_min < x->mv_row_min) 1622 row_min = x->mv_row_min; 1623 1624 if (row_max > x->mv_row_max) 1625 row_max = x->mv_row_max; 1626 1627 for (r = row_min; r < row_max ; r++) 1628 { 1629 this_mv.as_mv.row = r; 1630 check_here = r * mv_stride + in_what + col_min; 1631 c = col_min; 1632 1633 while ((c + 7) < col_max) 1634 { 1635 int i; 1636 1637 fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); 1638 1639 for (i = 0; i < 8; i++) 1640 { 1641 thissad = sad_array8[i]; 1642 1643 if (thissad < bestsad) 1644 { 1645 this_mv.as_mv.col = c; 1646 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1647 mvsadcost, sad_per_bit); 1648 1649 if (thissad < bestsad) 1650 { 1651 bestsad = thissad; 1652 best_mv->as_mv.row = r; 1653 best_mv->as_mv.col = c; 1654 bestaddress = check_here; 1655 } 1656 } 1657 1658 check_here++; 1659 c++; 1660 } 1661 } 1662 1663 while ((c + 2) < col_max) 1664 { 1665 int i; 1666 1667 fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array); 1668 1669 for (i = 0; i < 3; i++) 1670 { 1671 thissad = sad_array[i]; 1672 1673 if (thissad < bestsad) 1674 { 1675 this_mv.as_mv.col = c; 1676 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1677 mvsadcost, sad_per_bit); 1678 1679 if (thissad < bestsad) 1680 { 1681 bestsad = thissad; 1682 best_mv->as_mv.row = r; 1683 best_mv->as_mv.col = c; 1684 bestaddress = check_here; 1685 } 1686 } 1687 1688 check_here++; 1689 c++; 1690 } 1691 } 1692 1693 while (c < col_max) 1694 { 1695 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); 1696 1697 if (thissad < bestsad) 1698 { 1699 this_mv.as_mv.col = c; 1700 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, 1701 mvsadcost, sad_per_bit); 1702 1703 if (thissad < bestsad) 1704 { 1705 bestsad = thissad; 1706 best_mv->as_mv.row = r; 1707 best_mv->as_mv.col = c; 1708 bestaddress = check_here; 1709 } 1710 } 1711 1712 check_here ++; 1713 c ++; 1714 } 1715 } 1716 1717 this_mv.as_mv.row = best_mv->as_mv.row * 8; 1718 this_mv.as_mv.col = best_mv->as_mv.col * 8; 1719 1720 return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) 1721 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); 1722} 1723 1724int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, 1725 int error_per_bit, int search_range, 1726 vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], 1727 int_mv *center_mv) 1728{ 1729 MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}}; 1730 int i, j; 1731 short this_row_offset, this_col_offset; 1732 1733 int what_stride = b->src_stride; 1734 int pre_stride = x->e_mbd.pre.y_stride; 1735 unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1736 int in_what_stride = pre_stride; 1737 unsigned char *what = (*(b->base_src) + b->src); 1738 unsigned char *best_address = (unsigned char *)(base_pre + d->offset + 1739 (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col); 1740 unsigned char *check_here; 1741 int_mv this_mv; 1742 unsigned int bestsad; 1743 unsigned int thissad; 1744 1745 int *mvsadcost[2]; 1746 int_mv fcenter_mv; 1747 1748 mvsadcost[0] = x->mvsadcost[0]; 1749 mvsadcost[1] = x->mvsadcost[1]; 1750 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1751 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1752 1753 bestsad = fn_ptr->sdf(what, what_stride, best_address, 1754 in_what_stride, UINT_MAX) 1755 + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); 1756 1757 for (i=0; i<search_range; i++) 1758 { 1759 int best_site = -1; 1760 1761 for (j = 0 ; j < 4 ; j++) 1762 { 1763 this_row_offset = ref_mv->as_mv.row + neighbors[j].row; 1764 this_col_offset = ref_mv->as_mv.col + neighbors[j].col; 1765 1766 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && 1767 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) 1768 { 1769 check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address; 1770 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); 1771 1772 if (thissad < bestsad) 1773 { 1774 this_mv.as_mv.row = this_row_offset; 1775 this_mv.as_mv.col = this_col_offset; 1776 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); 1777 1778 if (thissad < bestsad) 1779 { 1780 bestsad = thissad; 1781 best_site = j; 1782 } 1783 } 1784 } 1785 } 1786 1787 if (best_site == -1) 1788 break; 1789 else 1790 { 1791 ref_mv->as_mv.row += neighbors[best_site].row; 1792 ref_mv->as_mv.col += neighbors[best_site].col; 1793 best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col; 1794 } 1795 } 1796 1797 this_mv.as_mv.row = ref_mv->as_mv.row << 3; 1798 this_mv.as_mv.col = ref_mv->as_mv.col << 3; 1799 1800 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) 1801 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); 1802} 1803 1804int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, 1805 int_mv *ref_mv, int error_per_bit, 1806 int search_range, vp8_variance_fn_ptr_t *fn_ptr, 1807 int *mvcost[2], int_mv *center_mv) 1808{ 1809 MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}}; 1810 int i, j; 1811 short this_row_offset, this_col_offset; 1812 1813 int what_stride = b->src_stride; 1814 int pre_stride = x->e_mbd.pre.y_stride; 1815 unsigned char *base_pre = x->e_mbd.pre.y_buffer; 1816 int in_what_stride = pre_stride; 1817 unsigned char *what = (*(b->base_src) + b->src); 1818 unsigned char *best_address = (unsigned char *)(base_pre + d->offset + 1819 (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col); 1820 unsigned char *check_here; 1821 int_mv this_mv; 1822 unsigned int bestsad; 1823 unsigned int thissad; 1824 1825 int *mvsadcost[2]; 1826 int_mv fcenter_mv; 1827 1828 mvsadcost[0] = x->mvsadcost[0]; 1829 mvsadcost[1] = x->mvsadcost[1]; 1830 fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; 1831 fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; 1832 1833 bestsad = fn_ptr->sdf(what, what_stride, best_address, 1834 in_what_stride, UINT_MAX) 1835 + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); 1836 1837 for (i=0; i<search_range; i++) 1838 { 1839 int best_site = -1; 1840 int all_in = 1; 1841 1842 all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min); 1843 all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max); 1844 all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min); 1845 all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max); 1846 1847 if(all_in) 1848 { 1849 unsigned int sad_array[4]; 1850 const unsigned char *block_offset[4]; 1851 block_offset[0] = best_address - in_what_stride; 1852 block_offset[1] = best_address - 1; 1853 block_offset[2] = best_address + 1; 1854 block_offset[3] = best_address + in_what_stride; 1855 1856 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array); 1857 1858 for (j = 0; j < 4; j++) 1859 { 1860 if (sad_array[j] < bestsad) 1861 { 1862 this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row; 1863 this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col; 1864 sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); 1865 1866 if (sad_array[j] < bestsad) 1867 { 1868 bestsad = sad_array[j]; 1869 best_site = j; 1870 } 1871 } 1872 } 1873 } 1874 else 1875 { 1876 for (j = 0 ; j < 4 ; j++) 1877 { 1878 this_row_offset = ref_mv->as_mv.row + neighbors[j].row; 1879 this_col_offset = ref_mv->as_mv.col + neighbors[j].col; 1880 1881 if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && 1882 (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) 1883 { 1884 check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address; 1885 thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); 1886 1887 if (thissad < bestsad) 1888 { 1889 this_mv.as_mv.row = this_row_offset; 1890 this_mv.as_mv.col = this_col_offset; 1891 thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); 1892 1893 if (thissad < bestsad) 1894 { 1895 bestsad = thissad; 1896 best_site = j; 1897 } 1898 } 1899 } 1900 } 1901 } 1902 1903 if (best_site == -1) 1904 break; 1905 else 1906 { 1907 ref_mv->as_mv.row += neighbors[best_site].row; 1908 ref_mv->as_mv.col += neighbors[best_site].col; 1909 best_address += (neighbors[best_site].row)*in_what_stride + neighbors[best_site].col; 1910 } 1911 } 1912 1913 this_mv.as_mv.row = ref_mv->as_mv.row * 8; 1914 this_mv.as_mv.col = ref_mv->as_mv.col * 8; 1915 1916 return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) 1917 + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); 1918} 1919 1920#ifdef VP8_ENTROPY_STATS 1921void print_mode_context(void) 1922{ 1923 FILE *f = fopen("modecont.c", "w"); 1924 int i, j; 1925 1926 fprintf(f, "#include \"entropy.h\"\n"); 1927 fprintf(f, "const int vp8_mode_contexts[6][4] =\n"); 1928 fprintf(f, "{\n"); 1929 1930 for (j = 0; j < 6; j++) 1931 { 1932 fprintf(f, " { /* %d */\n", j); 1933 fprintf(f, " "); 1934 1935 for (i = 0; i < 4; i++) 1936 { 1937 int overal_prob; 1938 int this_prob; 1939 int count; 1940 1941 /* Overall probs */ 1942 count = mv_mode_cts[i][0] + mv_mode_cts[i][1]; 1943 1944 if (count) 1945 overal_prob = 256 * mv_mode_cts[i][0] / count; 1946 else 1947 overal_prob = 128; 1948 1949 if (overal_prob == 0) 1950 overal_prob = 1; 1951 1952 /* context probs */ 1953 count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1]; 1954 1955 if (count) 1956 this_prob = 256 * mv_ref_ct[j][i][0] / count; 1957 else 1958 this_prob = 128; 1959 1960 if (this_prob == 0) 1961 this_prob = 1; 1962 1963 fprintf(f, "%5d, ", this_prob); 1964 } 1965 1966 fprintf(f, " },\n"); 1967 } 1968 1969 fprintf(f, "};\n"); 1970 fclose(f); 1971} 1972 1973/* MV ref count VP8_ENTROPY_STATS stats code */ 1974#ifdef VP8_ENTROPY_STATS 1975void init_mv_ref_counts() 1976{ 1977 vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct)); 1978 vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts)); 1979} 1980 1981void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) 1982{ 1983 if (m == ZEROMV) 1984 { 1985 ++mv_ref_ct [ct[0]] [0] [0]; 1986 ++mv_mode_cts[0][0]; 1987 } 1988 else 1989 { 1990 ++mv_ref_ct [ct[0]] [0] [1]; 1991 ++mv_mode_cts[0][1]; 1992 1993 if (m == NEARESTMV) 1994 { 1995 ++mv_ref_ct [ct[1]] [1] [0]; 1996 ++mv_mode_cts[1][0]; 1997 } 1998 else 1999 { 2000 ++mv_ref_ct [ct[1]] [1] [1]; 2001 ++mv_mode_cts[1][1]; 2002 2003 if (m == NEARMV) 2004 { 2005 ++mv_ref_ct [ct[2]] [2] [0]; 2006 ++mv_mode_cts[2][0]; 2007 } 2008 else 2009 { 2010 ++mv_ref_ct [ct[2]] [2] [1]; 2011 ++mv_mode_cts[2][1]; 2012 2013 if (m == NEWMV) 2014 { 2015 ++mv_ref_ct [ct[3]] [3] [0]; 2016 ++mv_mode_cts[3][0]; 2017 } 2018 else 2019 { 2020 ++mv_ref_ct [ct[3]] [3] [1]; 2021 ++mv_mode_cts[3][1]; 2022 } 2023 } 2024 } 2025 } 2026} 2027 2028#endif/* END MV ref count VP8_ENTROPY_STATS stats code */ 2029 2030#endif 2031