129a84457aed4c45bc900998b5e11c03023264208James Dong/* ------------------------------------------------------------------ 229a84457aed4c45bc900998b5e11c03023264208James Dong * Copyright (C) 1998-2009 PacketVideo 329a84457aed4c45bc900998b5e11c03023264208James Dong * 429a84457aed4c45bc900998b5e11c03023264208James Dong * Licensed under the Apache License, Version 2.0 (the "License"); 529a84457aed4c45bc900998b5e11c03023264208James Dong * you may not use this file except in compliance with the License. 629a84457aed4c45bc900998b5e11c03023264208James Dong * You may obtain a copy of the License at 729a84457aed4c45bc900998b5e11c03023264208James Dong * 829a84457aed4c45bc900998b5e11c03023264208James Dong * http://www.apache.org/licenses/LICENSE-2.0 929a84457aed4c45bc900998b5e11c03023264208James Dong * 1029a84457aed4c45bc900998b5e11c03023264208James Dong * Unless required by applicable law or agreed to in writing, software 1129a84457aed4c45bc900998b5e11c03023264208James Dong * distributed under the License is distributed on an "AS IS" BASIS, 1229a84457aed4c45bc900998b5e11c03023264208James Dong * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 1329a84457aed4c45bc900998b5e11c03023264208James Dong * express or implied. 1429a84457aed4c45bc900998b5e11c03023264208James Dong * See the License for the specific language governing permissions 1529a84457aed4c45bc900998b5e11c03023264208James Dong * and limitations under the License. 1629a84457aed4c45bc900998b5e11c03023264208James Dong * ------------------------------------------------------------------- 1729a84457aed4c45bc900998b5e11c03023264208James Dong */ 1829a84457aed4c45bc900998b5e11c03023264208James Dong#include "avcenc_lib.h" 1929a84457aed4c45bc900998b5e11c03023264208James Dong/* 3/29/01 fast half-pel search based on neighboring guess */ 2029a84457aed4c45bc900998b5e11c03023264208James Dong/* value ranging from 0 to 4, high complexity (more accurate) to 2129a84457aed4c45bc900998b5e11c03023264208James Dong low complexity (less accurate) */ 2229a84457aed4c45bc900998b5e11c03023264208James Dong#define HP_DISTANCE_TH 5 // 2 /* half-pel distance threshold */ 2329a84457aed4c45bc900998b5e11c03023264208James Dong 2429a84457aed4c45bc900998b5e11c03023264208James Dong#define PREF_16_VEC 129 /* 1MV bias versus 4MVs*/ 2529a84457aed4c45bc900998b5e11c03023264208James Dong 2629a84457aed4c45bc900998b5e11c03023264208James Dongconst static int distance_tab[9][9] = /* [hp_guess][k] */ 2729a84457aed4c45bc900998b5e11c03023264208James Dong{ 2829a84457aed4c45bc900998b5e11c03023264208James Dong {0, 1, 1, 1, 1, 1, 1, 1, 1}, 2929a84457aed4c45bc900998b5e11c03023264208James Dong {1, 0, 1, 2, 3, 4, 3, 2, 1}, 3029a84457aed4c45bc900998b5e11c03023264208James Dong {1, 0, 0, 0, 1, 2, 3, 2, 1}, 3129a84457aed4c45bc900998b5e11c03023264208James Dong {1, 2, 1, 0, 1, 2, 3, 4, 3}, 3229a84457aed4c45bc900998b5e11c03023264208James Dong {1, 2, 1, 0, 0, 0, 1, 2, 3}, 3329a84457aed4c45bc900998b5e11c03023264208James Dong {1, 4, 3, 2, 1, 0, 1, 2, 3}, 3429a84457aed4c45bc900998b5e11c03023264208James Dong {1, 2, 3, 2, 1, 0, 0, 0, 1}, 3529a84457aed4c45bc900998b5e11c03023264208James Dong {1, 2, 3, 4, 3, 2, 1, 0, 1}, 3629a84457aed4c45bc900998b5e11c03023264208James Dong {1, 0, 1, 2, 3, 2, 1, 0, 0} 3729a84457aed4c45bc900998b5e11c03023264208James Dong}; 3829a84457aed4c45bc900998b5e11c03023264208James Dong 3929a84457aed4c45bc900998b5e11c03023264208James Dong#define CLIP_RESULT(x) if((uint)x > 0xFF){ \ 4029a84457aed4c45bc900998b5e11c03023264208James Dong x = 0xFF & (~(x>>31));} 4129a84457aed4c45bc900998b5e11c03023264208James Dong 4229a84457aed4c45bc900998b5e11c03023264208James Dong#define CLIP_UPPER16(x) if((uint)x >= 0x20000000){ \ 4329a84457aed4c45bc900998b5e11c03023264208James Dong x = 0xFF0000 & (~(x>>31));} \ 4429a84457aed4c45bc900998b5e11c03023264208James Dong else { \ 4529a84457aed4c45bc900998b5e11c03023264208James Dong x = (x>>5)&0xFF0000; \ 4629a84457aed4c45bc900998b5e11c03023264208James Dong } 4729a84457aed4c45bc900998b5e11c03023264208James Dong 4829a84457aed4c45bc900998b5e11c03023264208James Dong/*===================================================================== 4929a84457aed4c45bc900998b5e11c03023264208James Dong Function: AVCFindHalfPelMB 5029a84457aed4c45bc900998b5e11c03023264208James Dong Date: 10/31/2007 5129a84457aed4c45bc900998b5e11c03023264208James Dong Purpose: Find half pel resolution MV surrounding the full-pel MV 5229a84457aed4c45bc900998b5e11c03023264208James Dong=====================================================================*/ 5329a84457aed4c45bc900998b5e11c03023264208James Dong 5429a84457aed4c45bc900998b5e11c03023264208James Dongint AVCFindHalfPelMB(AVCEncObject *encvid, uint8 *cur, AVCMV *mot, uint8 *ncand, 5529a84457aed4c45bc900998b5e11c03023264208James Dong int xpos, int ypos, int hp_guess, int cmvx, int cmvy) 5629a84457aed4c45bc900998b5e11c03023264208James Dong{ 5729a84457aed4c45bc900998b5e11c03023264208James Dong AVCPictureData *currPic = encvid->common->currPic; 5829a84457aed4c45bc900998b5e11c03023264208James Dong int lx = currPic->pitch; 5929a84457aed4c45bc900998b5e11c03023264208James Dong int d, dmin, satd_min; 6029a84457aed4c45bc900998b5e11c03023264208James Dong uint8* cand; 6129a84457aed4c45bc900998b5e11c03023264208James Dong int lambda_motion = encvid->lambda_motion; 6229a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *mvbits = encvid->mvbits; 6329a84457aed4c45bc900998b5e11c03023264208James Dong int mvcost; 6429a84457aed4c45bc900998b5e11c03023264208James Dong /* list of candidate to go through for half-pel search*/ 6529a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *subpel_pred = (uint8*) encvid->subpel_pred; // all 16 sub-pel positions 6629a84457aed4c45bc900998b5e11c03023264208James Dong uint8 **hpel_cand = (uint8**) encvid->hpel_cand; /* half-pel position */ 6729a84457aed4c45bc900998b5e11c03023264208James Dong 6829a84457aed4c45bc900998b5e11c03023264208James Dong int xh[9] = {0, 0, 2, 2, 2, 0, -2, -2, -2}; 6929a84457aed4c45bc900998b5e11c03023264208James Dong int yh[9] = {0, -2, -2, 0, 2, 2, 2, 0, -2}; 7029a84457aed4c45bc900998b5e11c03023264208James Dong int xq[8] = {0, 1, 1, 1, 0, -1, -1, -1}; 7129a84457aed4c45bc900998b5e11c03023264208James Dong int yq[8] = { -1, -1, 0, 1, 1, 1, 0, -1}; 7229a84457aed4c45bc900998b5e11c03023264208James Dong int h, hmin, q, qmin; 7329a84457aed4c45bc900998b5e11c03023264208James Dong 7429a84457aed4c45bc900998b5e11c03023264208James Dong OSCL_UNUSED_ARG(xpos); 7529a84457aed4c45bc900998b5e11c03023264208James Dong OSCL_UNUSED_ARG(ypos); 7629a84457aed4c45bc900998b5e11c03023264208James Dong OSCL_UNUSED_ARG(hp_guess); 7729a84457aed4c45bc900998b5e11c03023264208James Dong 7829a84457aed4c45bc900998b5e11c03023264208James Dong GenerateHalfPelPred(subpel_pred, ncand, lx); 7929a84457aed4c45bc900998b5e11c03023264208James Dong 8029a84457aed4c45bc900998b5e11c03023264208James Dong cur = encvid->currYMB; // pre-load current original MB 8129a84457aed4c45bc900998b5e11c03023264208James Dong 8229a84457aed4c45bc900998b5e11c03023264208James Dong cand = hpel_cand[0]; 8329a84457aed4c45bc900998b5e11c03023264208James Dong 8429a84457aed4c45bc900998b5e11c03023264208James Dong // find cost for the current full-pel position 8529a84457aed4c45bc900998b5e11c03023264208James Dong dmin = SATD_MB(cand, cur, 65535); // get Hadamaard transform SAD 8629a84457aed4c45bc900998b5e11c03023264208James Dong mvcost = MV_COST_S(lambda_motion, mot->x, mot->y, cmvx, cmvy); 8729a84457aed4c45bc900998b5e11c03023264208James Dong satd_min = dmin; 8829a84457aed4c45bc900998b5e11c03023264208James Dong dmin += mvcost; 8929a84457aed4c45bc900998b5e11c03023264208James Dong hmin = 0; 9029a84457aed4c45bc900998b5e11c03023264208James Dong 9129a84457aed4c45bc900998b5e11c03023264208James Dong /* find half-pel */ 9229a84457aed4c45bc900998b5e11c03023264208James Dong for (h = 1; h < 9; h++) 9329a84457aed4c45bc900998b5e11c03023264208James Dong { 9429a84457aed4c45bc900998b5e11c03023264208James Dong d = SATD_MB(hpel_cand[h], cur, dmin); 9529a84457aed4c45bc900998b5e11c03023264208James Dong mvcost = MV_COST_S(lambda_motion, mot->x + xh[h], mot->y + yh[h], cmvx, cmvy); 9629a84457aed4c45bc900998b5e11c03023264208James Dong d += mvcost; 9729a84457aed4c45bc900998b5e11c03023264208James Dong 9829a84457aed4c45bc900998b5e11c03023264208James Dong if (d < dmin) 9929a84457aed4c45bc900998b5e11c03023264208James Dong { 10029a84457aed4c45bc900998b5e11c03023264208James Dong dmin = d; 10129a84457aed4c45bc900998b5e11c03023264208James Dong hmin = h; 10229a84457aed4c45bc900998b5e11c03023264208James Dong satd_min = d - mvcost; 10329a84457aed4c45bc900998b5e11c03023264208James Dong } 10429a84457aed4c45bc900998b5e11c03023264208James Dong } 10529a84457aed4c45bc900998b5e11c03023264208James Dong 10629a84457aed4c45bc900998b5e11c03023264208James Dong mot->sad = dmin; 10729a84457aed4c45bc900998b5e11c03023264208James Dong mot->x += xh[hmin]; 10829a84457aed4c45bc900998b5e11c03023264208James Dong mot->y += yh[hmin]; 10929a84457aed4c45bc900998b5e11c03023264208James Dong encvid->best_hpel_pos = hmin; 11029a84457aed4c45bc900998b5e11c03023264208James Dong 11129a84457aed4c45bc900998b5e11c03023264208James Dong /*** search for quarter-pel ****/ 11229a84457aed4c45bc900998b5e11c03023264208James Dong GenerateQuartPelPred(encvid->bilin_base[hmin], &(encvid->qpel_cand[0][0]), hmin); 11329a84457aed4c45bc900998b5e11c03023264208James Dong 11429a84457aed4c45bc900998b5e11c03023264208James Dong encvid->best_qpel_pos = qmin = -1; 11529a84457aed4c45bc900998b5e11c03023264208James Dong 11629a84457aed4c45bc900998b5e11c03023264208James Dong for (q = 0; q < 8; q++) 11729a84457aed4c45bc900998b5e11c03023264208James Dong { 11829a84457aed4c45bc900998b5e11c03023264208James Dong d = SATD_MB(encvid->qpel_cand[q], cur, dmin); 11929a84457aed4c45bc900998b5e11c03023264208James Dong mvcost = MV_COST_S(lambda_motion, mot->x + xq[q], mot->y + yq[q], cmvx, cmvy); 12029a84457aed4c45bc900998b5e11c03023264208James Dong d += mvcost; 12129a84457aed4c45bc900998b5e11c03023264208James Dong if (d < dmin) 12229a84457aed4c45bc900998b5e11c03023264208James Dong { 12329a84457aed4c45bc900998b5e11c03023264208James Dong dmin = d; 12429a84457aed4c45bc900998b5e11c03023264208James Dong qmin = q; 12529a84457aed4c45bc900998b5e11c03023264208James Dong satd_min = d - mvcost; 12629a84457aed4c45bc900998b5e11c03023264208James Dong } 12729a84457aed4c45bc900998b5e11c03023264208James Dong } 12829a84457aed4c45bc900998b5e11c03023264208James Dong 12929a84457aed4c45bc900998b5e11c03023264208James Dong if (qmin != -1) 13029a84457aed4c45bc900998b5e11c03023264208James Dong { 13129a84457aed4c45bc900998b5e11c03023264208James Dong mot->sad = dmin; 13229a84457aed4c45bc900998b5e11c03023264208James Dong mot->x += xq[qmin]; 13329a84457aed4c45bc900998b5e11c03023264208James Dong mot->y += yq[qmin]; 13429a84457aed4c45bc900998b5e11c03023264208James Dong encvid->best_qpel_pos = qmin; 13529a84457aed4c45bc900998b5e11c03023264208James Dong } 13629a84457aed4c45bc900998b5e11c03023264208James Dong 13729a84457aed4c45bc900998b5e11c03023264208James Dong return satd_min; 13829a84457aed4c45bc900998b5e11c03023264208James Dong} 13929a84457aed4c45bc900998b5e11c03023264208James Dong 14029a84457aed4c45bc900998b5e11c03023264208James Dong 14129a84457aed4c45bc900998b5e11c03023264208James Dong 14229a84457aed4c45bc900998b5e11c03023264208James Dong/** This function generates sub-pel prediction around the full-pel candidate. 14329a84457aed4c45bc900998b5e11c03023264208James DongEach sub-pel position array is 20 pixel wide (for word-alignment) and 17 pixel tall. */ 14429a84457aed4c45bc900998b5e11c03023264208James Dong/** The sub-pel position is labeled in spiral manner from the center. */ 14529a84457aed4c45bc900998b5e11c03023264208James Dong 14629a84457aed4c45bc900998b5e11c03023264208James Dongvoid GenerateHalfPelPred(uint8* subpel_pred, uint8 *ncand, int lx) 14729a84457aed4c45bc900998b5e11c03023264208James Dong{ 14829a84457aed4c45bc900998b5e11c03023264208James Dong /* let's do straightforward way first */ 14929a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *ref; 15029a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *dst; 15129a84457aed4c45bc900998b5e11c03023264208James Dong uint8 tmp8; 15229a84457aed4c45bc900998b5e11c03023264208James Dong int32 tmp32; 15329a84457aed4c45bc900998b5e11c03023264208James Dong int16 tmp_horz[18*22], *dst_16, *src_16; 15429a84457aed4c45bc900998b5e11c03023264208James Dong register int a = 0, b = 0, c = 0, d = 0, e = 0, f = 0; // temp register 15529a84457aed4c45bc900998b5e11c03023264208James Dong int msk; 15629a84457aed4c45bc900998b5e11c03023264208James Dong int i, j; 15729a84457aed4c45bc900998b5e11c03023264208James Dong 15829a84457aed4c45bc900998b5e11c03023264208James Dong /* first copy full-pel to the first array */ 15929a84457aed4c45bc900998b5e11c03023264208James Dong /* to be optimized later based on byte-offset load */ 16029a84457aed4c45bc900998b5e11c03023264208James Dong ref = ncand - 3 - lx - (lx << 1); /* move back (-3,-3) */ 16129a84457aed4c45bc900998b5e11c03023264208James Dong dst = subpel_pred; 16229a84457aed4c45bc900998b5e11c03023264208James Dong 16329a84457aed4c45bc900998b5e11c03023264208James Dong dst -= 4; /* offset */ 16429a84457aed4c45bc900998b5e11c03023264208James Dong for (j = 0; j < 22; j++) /* 24x22 */ 16529a84457aed4c45bc900998b5e11c03023264208James Dong { 16629a84457aed4c45bc900998b5e11c03023264208James Dong i = 6; 16729a84457aed4c45bc900998b5e11c03023264208James Dong while (i > 0) 16829a84457aed4c45bc900998b5e11c03023264208James Dong { 16929a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = *ref++; 17029a84457aed4c45bc900998b5e11c03023264208James Dong tmp8 = *ref++; 17129a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 |= (tmp8 << 8); 17229a84457aed4c45bc900998b5e11c03023264208James Dong tmp8 = *ref++; 17329a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 |= (tmp8 << 16); 17429a84457aed4c45bc900998b5e11c03023264208James Dong tmp8 = *ref++; 17529a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 |= (tmp8 << 24); 17629a84457aed4c45bc900998b5e11c03023264208James Dong *((uint32*)(dst += 4)) = tmp32; 17729a84457aed4c45bc900998b5e11c03023264208James Dong i--; 17829a84457aed4c45bc900998b5e11c03023264208James Dong } 17929a84457aed4c45bc900998b5e11c03023264208James Dong ref += (lx - 24); 18029a84457aed4c45bc900998b5e11c03023264208James Dong } 18129a84457aed4c45bc900998b5e11c03023264208James Dong 18229a84457aed4c45bc900998b5e11c03023264208James Dong /* from the first array, we do horizontal interp */ 18329a84457aed4c45bc900998b5e11c03023264208James Dong ref = subpel_pred + 2; 18429a84457aed4c45bc900998b5e11c03023264208James Dong dst_16 = tmp_horz; /* 17 x 22 */ 18529a84457aed4c45bc900998b5e11c03023264208James Dong 18629a84457aed4c45bc900998b5e11c03023264208James Dong for (j = 4; j > 0; j--) 18729a84457aed4c45bc900998b5e11c03023264208James Dong { 18829a84457aed4c45bc900998b5e11c03023264208James Dong for (i = 16; i > 0; i -= 4) 18929a84457aed4c45bc900998b5e11c03023264208James Dong { 19029a84457aed4c45bc900998b5e11c03023264208James Dong a = ref[-2]; 19129a84457aed4c45bc900998b5e11c03023264208James Dong b = ref[-1]; 19229a84457aed4c45bc900998b5e11c03023264208James Dong c = ref[0]; 19329a84457aed4c45bc900998b5e11c03023264208James Dong d = ref[1]; 19429a84457aed4c45bc900998b5e11c03023264208James Dong e = ref[2]; 19529a84457aed4c45bc900998b5e11c03023264208James Dong f = ref[3]; 19629a84457aed4c45bc900998b5e11c03023264208James Dong *dst_16++ = a + f - 5 * (b + e) + 20 * (c + d); 19729a84457aed4c45bc900998b5e11c03023264208James Dong a = ref[4]; 19829a84457aed4c45bc900998b5e11c03023264208James Dong *dst_16++ = b + a - 5 * (c + f) + 20 * (d + e); 19929a84457aed4c45bc900998b5e11c03023264208James Dong b = ref[5]; 20029a84457aed4c45bc900998b5e11c03023264208James Dong *dst_16++ = c + b - 5 * (d + a) + 20 * (e + f); 20129a84457aed4c45bc900998b5e11c03023264208James Dong c = ref[6]; 20229a84457aed4c45bc900998b5e11c03023264208James Dong *dst_16++ = d + c - 5 * (e + b) + 20 * (f + a); 20329a84457aed4c45bc900998b5e11c03023264208James Dong 20429a84457aed4c45bc900998b5e11c03023264208James Dong ref += 4; 20529a84457aed4c45bc900998b5e11c03023264208James Dong } 20629a84457aed4c45bc900998b5e11c03023264208James Dong /* do the 17th column here */ 20729a84457aed4c45bc900998b5e11c03023264208James Dong d = ref[3]; 20829a84457aed4c45bc900998b5e11c03023264208James Dong *dst_16 = e + d - 5 * (f + c) + 20 * (a + b); 20929a84457aed4c45bc900998b5e11c03023264208James Dong dst_16 += 2; /* stride for tmp_horz is 18 */ 21029a84457aed4c45bc900998b5e11c03023264208James Dong ref += 8; /* stride for ref is 24 */ 21129a84457aed4c45bc900998b5e11c03023264208James Dong if (j == 3) // move 18 lines down 21229a84457aed4c45bc900998b5e11c03023264208James Dong { 21329a84457aed4c45bc900998b5e11c03023264208James Dong dst_16 += 324;//18*18; 21429a84457aed4c45bc900998b5e11c03023264208James Dong ref += 432;//18*24; 21529a84457aed4c45bc900998b5e11c03023264208James Dong } 21629a84457aed4c45bc900998b5e11c03023264208James Dong } 21729a84457aed4c45bc900998b5e11c03023264208James Dong 21829a84457aed4c45bc900998b5e11c03023264208James Dong ref -= 480;//20*24; 21929a84457aed4c45bc900998b5e11c03023264208James Dong dst_16 -= 360;//20*18; 22029a84457aed4c45bc900998b5e11c03023264208James Dong dst = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE; /* go to the 14th array 17x18*/ 22129a84457aed4c45bc900998b5e11c03023264208James Dong 22229a84457aed4c45bc900998b5e11c03023264208James Dong for (j = 18; j > 0; j--) 22329a84457aed4c45bc900998b5e11c03023264208James Dong { 22429a84457aed4c45bc900998b5e11c03023264208James Dong for (i = 16; i > 0; i -= 4) 22529a84457aed4c45bc900998b5e11c03023264208James Dong { 22629a84457aed4c45bc900998b5e11c03023264208James Dong a = ref[-2]; 22729a84457aed4c45bc900998b5e11c03023264208James Dong b = ref[-1]; 22829a84457aed4c45bc900998b5e11c03023264208James Dong c = ref[0]; 22929a84457aed4c45bc900998b5e11c03023264208James Dong d = ref[1]; 23029a84457aed4c45bc900998b5e11c03023264208James Dong e = ref[2]; 23129a84457aed4c45bc900998b5e11c03023264208James Dong f = ref[3]; 23229a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = a + f - 5 * (b + e) + 20 * (c + d); 23329a84457aed4c45bc900998b5e11c03023264208James Dong *dst_16++ = tmp32; 23429a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = (tmp32 + 16) >> 5; 23529a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(tmp32) 23629a84457aed4c45bc900998b5e11c03023264208James Dong *dst++ = tmp32; 23729a84457aed4c45bc900998b5e11c03023264208James Dong 23829a84457aed4c45bc900998b5e11c03023264208James Dong a = ref[4]; 23929a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = b + a - 5 * (c + f) + 20 * (d + e); 24029a84457aed4c45bc900998b5e11c03023264208James Dong *dst_16++ = tmp32; 24129a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = (tmp32 + 16) >> 5; 24229a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(tmp32) 24329a84457aed4c45bc900998b5e11c03023264208James Dong *dst++ = tmp32; 24429a84457aed4c45bc900998b5e11c03023264208James Dong 24529a84457aed4c45bc900998b5e11c03023264208James Dong b = ref[5]; 24629a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = c + b - 5 * (d + a) + 20 * (e + f); 24729a84457aed4c45bc900998b5e11c03023264208James Dong *dst_16++ = tmp32; 24829a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = (tmp32 + 16) >> 5; 24929a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(tmp32) 25029a84457aed4c45bc900998b5e11c03023264208James Dong *dst++ = tmp32; 25129a84457aed4c45bc900998b5e11c03023264208James Dong 25229a84457aed4c45bc900998b5e11c03023264208James Dong c = ref[6]; 25329a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = d + c - 5 * (e + b) + 20 * (f + a); 25429a84457aed4c45bc900998b5e11c03023264208James Dong *dst_16++ = tmp32; 25529a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = (tmp32 + 16) >> 5; 25629a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(tmp32) 25729a84457aed4c45bc900998b5e11c03023264208James Dong *dst++ = tmp32; 25829a84457aed4c45bc900998b5e11c03023264208James Dong 25929a84457aed4c45bc900998b5e11c03023264208James Dong ref += 4; 26029a84457aed4c45bc900998b5e11c03023264208James Dong } 26129a84457aed4c45bc900998b5e11c03023264208James Dong /* do the 17th column here */ 26229a84457aed4c45bc900998b5e11c03023264208James Dong d = ref[3]; 26329a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = e + d - 5 * (f + c) + 20 * (a + b); 26429a84457aed4c45bc900998b5e11c03023264208James Dong *dst_16 = tmp32; 26529a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = (tmp32 + 16) >> 5; 26629a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(tmp32) 26729a84457aed4c45bc900998b5e11c03023264208James Dong *dst = tmp32; 26829a84457aed4c45bc900998b5e11c03023264208James Dong 26929a84457aed4c45bc900998b5e11c03023264208James Dong dst += 8; /* stride for dst is 24 */ 27029a84457aed4c45bc900998b5e11c03023264208James Dong dst_16 += 2; /* stride for tmp_horz is 18 */ 27129a84457aed4c45bc900998b5e11c03023264208James Dong ref += 8; /* stride for ref is 24 */ 27229a84457aed4c45bc900998b5e11c03023264208James Dong } 27329a84457aed4c45bc900998b5e11c03023264208James Dong 27429a84457aed4c45bc900998b5e11c03023264208James Dong 27529a84457aed4c45bc900998b5e11c03023264208James Dong /* Do middle point filtering*/ 27629a84457aed4c45bc900998b5e11c03023264208James Dong src_16 = tmp_horz; /* 17 x 22 */ 27729a84457aed4c45bc900998b5e11c03023264208James Dong dst = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE; /* 12th array 17x17*/ 27829a84457aed4c45bc900998b5e11c03023264208James Dong dst -= 24; // offset 27929a84457aed4c45bc900998b5e11c03023264208James Dong for (i = 0; i < 17; i++) 28029a84457aed4c45bc900998b5e11c03023264208James Dong { 28129a84457aed4c45bc900998b5e11c03023264208James Dong for (j = 16; j > 0; j -= 4) 28229a84457aed4c45bc900998b5e11c03023264208James Dong { 28329a84457aed4c45bc900998b5e11c03023264208James Dong a = *src_16; 28429a84457aed4c45bc900998b5e11c03023264208James Dong b = *(src_16 += 18); 28529a84457aed4c45bc900998b5e11c03023264208James Dong c = *(src_16 += 18); 28629a84457aed4c45bc900998b5e11c03023264208James Dong d = *(src_16 += 18); 28729a84457aed4c45bc900998b5e11c03023264208James Dong e = *(src_16 += 18); 28829a84457aed4c45bc900998b5e11c03023264208James Dong f = *(src_16 += 18); 28929a84457aed4c45bc900998b5e11c03023264208James Dong 29029a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = a + f - 5 * (b + e) + 20 * (c + d); 29129a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = (tmp32 + 512) >> 10; 29229a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(tmp32) 29329a84457aed4c45bc900998b5e11c03023264208James Dong *(dst += 24) = tmp32; 29429a84457aed4c45bc900998b5e11c03023264208James Dong 29529a84457aed4c45bc900998b5e11c03023264208James Dong a = *(src_16 += 18); 29629a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = b + a - 5 * (c + f) + 20 * (d + e); 29729a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = (tmp32 + 512) >> 10; 29829a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(tmp32) 29929a84457aed4c45bc900998b5e11c03023264208James Dong *(dst += 24) = tmp32; 30029a84457aed4c45bc900998b5e11c03023264208James Dong 30129a84457aed4c45bc900998b5e11c03023264208James Dong b = *(src_16 += 18); 30229a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = c + b - 5 * (d + a) + 20 * (e + f); 30329a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = (tmp32 + 512) >> 10; 30429a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(tmp32) 30529a84457aed4c45bc900998b5e11c03023264208James Dong *(dst += 24) = tmp32; 30629a84457aed4c45bc900998b5e11c03023264208James Dong 30729a84457aed4c45bc900998b5e11c03023264208James Dong c = *(src_16 += 18); 30829a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = d + c - 5 * (e + b) + 20 * (f + a); 30929a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = (tmp32 + 512) >> 10; 31029a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(tmp32) 31129a84457aed4c45bc900998b5e11c03023264208James Dong *(dst += 24) = tmp32; 31229a84457aed4c45bc900998b5e11c03023264208James Dong 31329a84457aed4c45bc900998b5e11c03023264208James Dong src_16 -= (18 << 2); 31429a84457aed4c45bc900998b5e11c03023264208James Dong } 31529a84457aed4c45bc900998b5e11c03023264208James Dong 31629a84457aed4c45bc900998b5e11c03023264208James Dong d = src_16[90]; // 18*5 31729a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = e + d - 5 * (f + c) + 20 * (a + b); 31829a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = (tmp32 + 512) >> 10; 31929a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(tmp32) 32029a84457aed4c45bc900998b5e11c03023264208James Dong dst[24] = tmp32; 32129a84457aed4c45bc900998b5e11c03023264208James Dong 32229a84457aed4c45bc900998b5e11c03023264208James Dong src_16 -= ((18 << 4) - 1); 32329a84457aed4c45bc900998b5e11c03023264208James Dong dst -= ((24 << 4) - 1); 32429a84457aed4c45bc900998b5e11c03023264208James Dong } 32529a84457aed4c45bc900998b5e11c03023264208James Dong 32629a84457aed4c45bc900998b5e11c03023264208James Dong /* do vertical interpolation */ 32729a84457aed4c45bc900998b5e11c03023264208James Dong ref = subpel_pred + 2; 32829a84457aed4c45bc900998b5e11c03023264208James Dong dst = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE; /* 10th array 18x17 */ 32929a84457aed4c45bc900998b5e11c03023264208James Dong dst -= 24; // offset 33029a84457aed4c45bc900998b5e11c03023264208James Dong 33129a84457aed4c45bc900998b5e11c03023264208James Dong for (i = 2; i > 0; i--) 33229a84457aed4c45bc900998b5e11c03023264208James Dong { 33329a84457aed4c45bc900998b5e11c03023264208James Dong for (j = 16; j > 0; j -= 4) 33429a84457aed4c45bc900998b5e11c03023264208James Dong { 33529a84457aed4c45bc900998b5e11c03023264208James Dong a = *ref; 33629a84457aed4c45bc900998b5e11c03023264208James Dong b = *(ref += 24); 33729a84457aed4c45bc900998b5e11c03023264208James Dong c = *(ref += 24); 33829a84457aed4c45bc900998b5e11c03023264208James Dong d = *(ref += 24); 33929a84457aed4c45bc900998b5e11c03023264208James Dong e = *(ref += 24); 34029a84457aed4c45bc900998b5e11c03023264208James Dong f = *(ref += 24); 34129a84457aed4c45bc900998b5e11c03023264208James Dong 34229a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = a + f - 5 * (b + e) + 20 * (c + d); 34329a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = (tmp32 + 16) >> 5; 34429a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(tmp32) 34529a84457aed4c45bc900998b5e11c03023264208James Dong *(dst += 24) = tmp32; // 10th 34629a84457aed4c45bc900998b5e11c03023264208James Dong 34729a84457aed4c45bc900998b5e11c03023264208James Dong a = *(ref += 24); 34829a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = b + a - 5 * (c + f) + 20 * (d + e); 34929a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = (tmp32 + 16) >> 5; 35029a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(tmp32) 35129a84457aed4c45bc900998b5e11c03023264208James Dong *(dst += 24) = tmp32; // 10th 35229a84457aed4c45bc900998b5e11c03023264208James Dong 35329a84457aed4c45bc900998b5e11c03023264208James Dong b = *(ref += 24); 35429a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = c + b - 5 * (d + a) + 20 * (e + f); 35529a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = (tmp32 + 16) >> 5; 35629a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(tmp32) 35729a84457aed4c45bc900998b5e11c03023264208James Dong *(dst += 24) = tmp32; // 10th 35829a84457aed4c45bc900998b5e11c03023264208James Dong 35929a84457aed4c45bc900998b5e11c03023264208James Dong c = *(ref += 24); 36029a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = d + c - 5 * (e + b) + 20 * (f + a); 36129a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = (tmp32 + 16) >> 5; 36229a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(tmp32) 36329a84457aed4c45bc900998b5e11c03023264208James Dong *(dst += 24) = tmp32; // 10th 36429a84457aed4c45bc900998b5e11c03023264208James Dong 36529a84457aed4c45bc900998b5e11c03023264208James Dong ref -= (24 << 2); 36629a84457aed4c45bc900998b5e11c03023264208James Dong } 36729a84457aed4c45bc900998b5e11c03023264208James Dong 36829a84457aed4c45bc900998b5e11c03023264208James Dong d = ref[120]; // 24*5 36929a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = e + d - 5 * (f + c) + 20 * (a + b); 37029a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = (tmp32 + 16) >> 5; 37129a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(tmp32) 37229a84457aed4c45bc900998b5e11c03023264208James Dong dst[24] = tmp32; // 10th 37329a84457aed4c45bc900998b5e11c03023264208James Dong 37429a84457aed4c45bc900998b5e11c03023264208James Dong dst -= ((24 << 4) - 1); 37529a84457aed4c45bc900998b5e11c03023264208James Dong ref -= ((24 << 4) - 1); 37629a84457aed4c45bc900998b5e11c03023264208James Dong } 37729a84457aed4c45bc900998b5e11c03023264208James Dong 37829a84457aed4c45bc900998b5e11c03023264208James Dong // note that using SIMD here doesn't help much, the cycle almost stays the same 37929a84457aed4c45bc900998b5e11c03023264208James Dong // one can just use the above code and change the for(i=2 to for(i=18 38029a84457aed4c45bc900998b5e11c03023264208James Dong for (i = 16; i > 0; i -= 4) 38129a84457aed4c45bc900998b5e11c03023264208James Dong { 38229a84457aed4c45bc900998b5e11c03023264208James Dong msk = 0; 38329a84457aed4c45bc900998b5e11c03023264208James Dong for (j = 17; j > 0; j--) 38429a84457aed4c45bc900998b5e11c03023264208James Dong { 38529a84457aed4c45bc900998b5e11c03023264208James Dong a = *((uint32*)ref); /* load 4 bytes */ 38629a84457aed4c45bc900998b5e11c03023264208James Dong b = (a >> 8) & 0xFF00FF; /* second and fourth byte */ 38729a84457aed4c45bc900998b5e11c03023264208James Dong a &= 0xFF00FF; 38829a84457aed4c45bc900998b5e11c03023264208James Dong 38929a84457aed4c45bc900998b5e11c03023264208James Dong c = *((uint32*)(ref + 120)); 39029a84457aed4c45bc900998b5e11c03023264208James Dong d = (c >> 8) & 0xFF00FF; 39129a84457aed4c45bc900998b5e11c03023264208James Dong c &= 0xFF00FF; 39229a84457aed4c45bc900998b5e11c03023264208James Dong 39329a84457aed4c45bc900998b5e11c03023264208James Dong a += c; 39429a84457aed4c45bc900998b5e11c03023264208James Dong b += d; 39529a84457aed4c45bc900998b5e11c03023264208James Dong 39629a84457aed4c45bc900998b5e11c03023264208James Dong e = *((uint32*)(ref + 72)); /* e, f */ 39729a84457aed4c45bc900998b5e11c03023264208James Dong f = (e >> 8) & 0xFF00FF; 39829a84457aed4c45bc900998b5e11c03023264208James Dong e &= 0xFF00FF; 39929a84457aed4c45bc900998b5e11c03023264208James Dong 40029a84457aed4c45bc900998b5e11c03023264208James Dong c = *((uint32*)(ref + 48)); /* c, d */ 40129a84457aed4c45bc900998b5e11c03023264208James Dong d = (c >> 8) & 0xFF00FF; 40229a84457aed4c45bc900998b5e11c03023264208James Dong c &= 0xFF00FF; 40329a84457aed4c45bc900998b5e11c03023264208James Dong 40429a84457aed4c45bc900998b5e11c03023264208James Dong c += e; 40529a84457aed4c45bc900998b5e11c03023264208James Dong d += f; 40629a84457aed4c45bc900998b5e11c03023264208James Dong 40729a84457aed4c45bc900998b5e11c03023264208James Dong a += 20 * c; 40829a84457aed4c45bc900998b5e11c03023264208James Dong b += 20 * d; 40929a84457aed4c45bc900998b5e11c03023264208James Dong a += 0x100010; 41029a84457aed4c45bc900998b5e11c03023264208James Dong b += 0x100010; 41129a84457aed4c45bc900998b5e11c03023264208James Dong 41229a84457aed4c45bc900998b5e11c03023264208James Dong e = *((uint32*)(ref += 24)); /* e, f */ 41329a84457aed4c45bc900998b5e11c03023264208James Dong f = (e >> 8) & 0xFF00FF; 41429a84457aed4c45bc900998b5e11c03023264208James Dong e &= 0xFF00FF; 41529a84457aed4c45bc900998b5e11c03023264208James Dong 41629a84457aed4c45bc900998b5e11c03023264208James Dong c = *((uint32*)(ref + 72)); /* c, d */ 41729a84457aed4c45bc900998b5e11c03023264208James Dong d = (c >> 8) & 0xFF00FF; 41829a84457aed4c45bc900998b5e11c03023264208James Dong c &= 0xFF00FF; 41929a84457aed4c45bc900998b5e11c03023264208James Dong 42029a84457aed4c45bc900998b5e11c03023264208James Dong c += e; 42129a84457aed4c45bc900998b5e11c03023264208James Dong d += f; 42229a84457aed4c45bc900998b5e11c03023264208James Dong 42329a84457aed4c45bc900998b5e11c03023264208James Dong a -= 5 * c; 42429a84457aed4c45bc900998b5e11c03023264208James Dong b -= 5 * d; 42529a84457aed4c45bc900998b5e11c03023264208James Dong 42629a84457aed4c45bc900998b5e11c03023264208James Dong c = a << 16; 42729a84457aed4c45bc900998b5e11c03023264208James Dong d = b << 16; 42829a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_UPPER16(a) 42929a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_UPPER16(c) 43029a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_UPPER16(b) 43129a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_UPPER16(d) 43229a84457aed4c45bc900998b5e11c03023264208James Dong 43329a84457aed4c45bc900998b5e11c03023264208James Dong a |= (c >> 16); 43429a84457aed4c45bc900998b5e11c03023264208James Dong b |= (d >> 16); 43529a84457aed4c45bc900998b5e11c03023264208James Dong // a>>=5; 43629a84457aed4c45bc900998b5e11c03023264208James Dong // b>>=5; 43729a84457aed4c45bc900998b5e11c03023264208James Dong /* clip */ 43829a84457aed4c45bc900998b5e11c03023264208James Dong // msk |= b; msk|=a; 43929a84457aed4c45bc900998b5e11c03023264208James Dong // a &= 0xFF00FF; 44029a84457aed4c45bc900998b5e11c03023264208James Dong // b &= 0xFF00FF; 44129a84457aed4c45bc900998b5e11c03023264208James Dong a |= (b << 8); /* pack it back */ 44229a84457aed4c45bc900998b5e11c03023264208James Dong 44329a84457aed4c45bc900998b5e11c03023264208James Dong *((uint16*)(dst += 24)) = a & 0xFFFF; //dst is not word-aligned. 44429a84457aed4c45bc900998b5e11c03023264208James Dong *((uint16*)(dst + 2)) = a >> 16; 44529a84457aed4c45bc900998b5e11c03023264208James Dong 44629a84457aed4c45bc900998b5e11c03023264208James Dong } 44729a84457aed4c45bc900998b5e11c03023264208James Dong dst -= 404; // 24*17-4 44829a84457aed4c45bc900998b5e11c03023264208James Dong ref -= 404; 44929a84457aed4c45bc900998b5e11c03023264208James Dong /* if(msk & 0xFF00FF00) // need clipping 45029a84457aed4c45bc900998b5e11c03023264208James Dong { 45129a84457aed4c45bc900998b5e11c03023264208James Dong VertInterpWClip(dst,ref); // re-do 4 column with clip 45229a84457aed4c45bc900998b5e11c03023264208James Dong }*/ 45329a84457aed4c45bc900998b5e11c03023264208James Dong } 45429a84457aed4c45bc900998b5e11c03023264208James Dong 45529a84457aed4c45bc900998b5e11c03023264208James Dong return ; 45629a84457aed4c45bc900998b5e11c03023264208James Dong} 45729a84457aed4c45bc900998b5e11c03023264208James Dong 45829a84457aed4c45bc900998b5e11c03023264208James Dongvoid VertInterpWClip(uint8 *dst, uint8 *ref) 45929a84457aed4c45bc900998b5e11c03023264208James Dong{ 46029a84457aed4c45bc900998b5e11c03023264208James Dong int i, j; 46129a84457aed4c45bc900998b5e11c03023264208James Dong int a, b, c, d, e, f; 46229a84457aed4c45bc900998b5e11c03023264208James Dong int32 tmp32; 46329a84457aed4c45bc900998b5e11c03023264208James Dong 46429a84457aed4c45bc900998b5e11c03023264208James Dong dst -= 4; 46529a84457aed4c45bc900998b5e11c03023264208James Dong ref -= 4; 46629a84457aed4c45bc900998b5e11c03023264208James Dong 46729a84457aed4c45bc900998b5e11c03023264208James Dong for (i = 4; i > 0; i--) 46829a84457aed4c45bc900998b5e11c03023264208James Dong { 46929a84457aed4c45bc900998b5e11c03023264208James Dong for (j = 16; j > 0; j -= 4) 47029a84457aed4c45bc900998b5e11c03023264208James Dong { 47129a84457aed4c45bc900998b5e11c03023264208James Dong a = *ref; 47229a84457aed4c45bc900998b5e11c03023264208James Dong b = *(ref += 24); 47329a84457aed4c45bc900998b5e11c03023264208James Dong c = *(ref += 24); 47429a84457aed4c45bc900998b5e11c03023264208James Dong d = *(ref += 24); 47529a84457aed4c45bc900998b5e11c03023264208James Dong e = *(ref += 24); 47629a84457aed4c45bc900998b5e11c03023264208James Dong f = *(ref += 24); 47729a84457aed4c45bc900998b5e11c03023264208James Dong 47829a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = a + f - 5 * (b + e) + 20 * (c + d); 47929a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = (tmp32 + 16) >> 5; 48029a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(tmp32) 48129a84457aed4c45bc900998b5e11c03023264208James Dong *(dst += 24) = tmp32; // 10th 48229a84457aed4c45bc900998b5e11c03023264208James Dong 48329a84457aed4c45bc900998b5e11c03023264208James Dong a = *(ref += 24); 48429a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = b + a - 5 * (c + f) + 20 * (d + e); 48529a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = (tmp32 + 16) >> 5; 48629a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(tmp32) 48729a84457aed4c45bc900998b5e11c03023264208James Dong *(dst += 24) = tmp32; // 10th 48829a84457aed4c45bc900998b5e11c03023264208James Dong 48929a84457aed4c45bc900998b5e11c03023264208James Dong b = *(ref += 24); 49029a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = c + b - 5 * (d + a) + 20 * (e + f); 49129a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = (tmp32 + 16) >> 5; 49229a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(tmp32) 49329a84457aed4c45bc900998b5e11c03023264208James Dong *(dst += 24) = tmp32; // 10th 49429a84457aed4c45bc900998b5e11c03023264208James Dong 49529a84457aed4c45bc900998b5e11c03023264208James Dong c = *(ref += 24); 49629a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = d + c - 5 * (e + b) + 20 * (f + a); 49729a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = (tmp32 + 16) >> 5; 49829a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(tmp32) 49929a84457aed4c45bc900998b5e11c03023264208James Dong *(dst += 24) = tmp32; // 10th 50029a84457aed4c45bc900998b5e11c03023264208James Dong 50129a84457aed4c45bc900998b5e11c03023264208James Dong ref -= (24 << 2); 50229a84457aed4c45bc900998b5e11c03023264208James Dong } 50329a84457aed4c45bc900998b5e11c03023264208James Dong 50429a84457aed4c45bc900998b5e11c03023264208James Dong d = ref[120]; // 24*5 50529a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = e + d - 5 * (f + c) + 20 * (a + b); 50629a84457aed4c45bc900998b5e11c03023264208James Dong tmp32 = (tmp32 + 16) >> 5; 50729a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(tmp32) 50829a84457aed4c45bc900998b5e11c03023264208James Dong dst[24] = tmp32; // 10th 50929a84457aed4c45bc900998b5e11c03023264208James Dong 51029a84457aed4c45bc900998b5e11c03023264208James Dong dst -= ((24 << 4) - 1); 51129a84457aed4c45bc900998b5e11c03023264208James Dong ref -= ((24 << 4) - 1); 51229a84457aed4c45bc900998b5e11c03023264208James Dong } 51329a84457aed4c45bc900998b5e11c03023264208James Dong 51429a84457aed4c45bc900998b5e11c03023264208James Dong return ; 51529a84457aed4c45bc900998b5e11c03023264208James Dong} 51629a84457aed4c45bc900998b5e11c03023264208James Dong 51729a84457aed4c45bc900998b5e11c03023264208James Dong 51829a84457aed4c45bc900998b5e11c03023264208James Dongvoid GenerateQuartPelPred(uint8 **bilin_base, uint8 *qpel_cand, int hpel_pos) 51929a84457aed4c45bc900998b5e11c03023264208James Dong{ 52029a84457aed4c45bc900998b5e11c03023264208James Dong // for even value of hpel_pos, start with pattern 1, otherwise, start with pattern 2 52129a84457aed4c45bc900998b5e11c03023264208James Dong int i, j; 52229a84457aed4c45bc900998b5e11c03023264208James Dong 52329a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *c1 = qpel_cand; 52429a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *tl = bilin_base[0]; 52529a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *tr = bilin_base[1]; 52629a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *bl = bilin_base[2]; 52729a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *br = bilin_base[3]; 52829a84457aed4c45bc900998b5e11c03023264208James Dong int a, b, c, d; 52929a84457aed4c45bc900998b5e11c03023264208James Dong int offset = 1 - (384 * 7); 53029a84457aed4c45bc900998b5e11c03023264208James Dong 53129a84457aed4c45bc900998b5e11c03023264208James Dong if (!(hpel_pos&1)) // diamond pattern 53229a84457aed4c45bc900998b5e11c03023264208James Dong { 53329a84457aed4c45bc900998b5e11c03023264208James Dong j = 16; 53429a84457aed4c45bc900998b5e11c03023264208James Dong while (j--) 53529a84457aed4c45bc900998b5e11c03023264208James Dong { 53629a84457aed4c45bc900998b5e11c03023264208James Dong i = 16; 53729a84457aed4c45bc900998b5e11c03023264208James Dong while (i--) 53829a84457aed4c45bc900998b5e11c03023264208James Dong { 53929a84457aed4c45bc900998b5e11c03023264208James Dong d = tr[24]; 54029a84457aed4c45bc900998b5e11c03023264208James Dong a = *tr++; 54129a84457aed4c45bc900998b5e11c03023264208James Dong b = bl[1]; 54229a84457aed4c45bc900998b5e11c03023264208James Dong c = *br++; 54329a84457aed4c45bc900998b5e11c03023264208James Dong 54429a84457aed4c45bc900998b5e11c03023264208James Dong *c1 = (c + a + 1) >> 1; 54529a84457aed4c45bc900998b5e11c03023264208James Dong *(c1 += 384) = (b + a + 1) >> 1; /* c2 */ 54629a84457aed4c45bc900998b5e11c03023264208James Dong *(c1 += 384) = (b + c + 1) >> 1; /* c3 */ 54729a84457aed4c45bc900998b5e11c03023264208James Dong *(c1 += 384) = (b + d + 1) >> 1; /* c4 */ 54829a84457aed4c45bc900998b5e11c03023264208James Dong 54929a84457aed4c45bc900998b5e11c03023264208James Dong b = *bl++; 55029a84457aed4c45bc900998b5e11c03023264208James Dong 55129a84457aed4c45bc900998b5e11c03023264208James Dong *(c1 += 384) = (c + d + 1) >> 1; /* c5 */ 55229a84457aed4c45bc900998b5e11c03023264208James Dong *(c1 += 384) = (b + d + 1) >> 1; /* c6 */ 55329a84457aed4c45bc900998b5e11c03023264208James Dong *(c1 += 384) = (b + c + 1) >> 1; /* c7 */ 55429a84457aed4c45bc900998b5e11c03023264208James Dong *(c1 += 384) = (b + a + 1) >> 1; /* c8 */ 55529a84457aed4c45bc900998b5e11c03023264208James Dong 55629a84457aed4c45bc900998b5e11c03023264208James Dong c1 += offset; 55729a84457aed4c45bc900998b5e11c03023264208James Dong } 55829a84457aed4c45bc900998b5e11c03023264208James Dong // advance to the next line, pitch is 24 55929a84457aed4c45bc900998b5e11c03023264208James Dong tl += 8; 56029a84457aed4c45bc900998b5e11c03023264208James Dong tr += 8; 56129a84457aed4c45bc900998b5e11c03023264208James Dong bl += 8; 56229a84457aed4c45bc900998b5e11c03023264208James Dong br += 8; 56329a84457aed4c45bc900998b5e11c03023264208James Dong c1 += 8; 56429a84457aed4c45bc900998b5e11c03023264208James Dong } 56529a84457aed4c45bc900998b5e11c03023264208James Dong } 56629a84457aed4c45bc900998b5e11c03023264208James Dong else // star pattern 56729a84457aed4c45bc900998b5e11c03023264208James Dong { 56829a84457aed4c45bc900998b5e11c03023264208James Dong j = 16; 56929a84457aed4c45bc900998b5e11c03023264208James Dong while (j--) 57029a84457aed4c45bc900998b5e11c03023264208James Dong { 57129a84457aed4c45bc900998b5e11c03023264208James Dong i = 16; 57229a84457aed4c45bc900998b5e11c03023264208James Dong while (i--) 57329a84457aed4c45bc900998b5e11c03023264208James Dong { 57429a84457aed4c45bc900998b5e11c03023264208James Dong a = *br++; 57529a84457aed4c45bc900998b5e11c03023264208James Dong b = *tr++; 57629a84457aed4c45bc900998b5e11c03023264208James Dong c = tl[1]; 57729a84457aed4c45bc900998b5e11c03023264208James Dong *c1 = (a + b + 1) >> 1; 57829a84457aed4c45bc900998b5e11c03023264208James Dong b = bl[1]; 57929a84457aed4c45bc900998b5e11c03023264208James Dong *(c1 += 384) = (a + c + 1) >> 1; /* c2 */ 58029a84457aed4c45bc900998b5e11c03023264208James Dong c = tl[25]; 58129a84457aed4c45bc900998b5e11c03023264208James Dong *(c1 += 384) = (a + b + 1) >> 1; /* c3 */ 58229a84457aed4c45bc900998b5e11c03023264208James Dong b = tr[23]; 58329a84457aed4c45bc900998b5e11c03023264208James Dong *(c1 += 384) = (a + c + 1) >> 1; /* c4 */ 58429a84457aed4c45bc900998b5e11c03023264208James Dong c = tl[24]; 58529a84457aed4c45bc900998b5e11c03023264208James Dong *(c1 += 384) = (a + b + 1) >> 1; /* c5 */ 58629a84457aed4c45bc900998b5e11c03023264208James Dong b = *bl++; 58729a84457aed4c45bc900998b5e11c03023264208James Dong *(c1 += 384) = (a + c + 1) >> 1; /* c6 */ 58829a84457aed4c45bc900998b5e11c03023264208James Dong c = *tl++; 58929a84457aed4c45bc900998b5e11c03023264208James Dong *(c1 += 384) = (a + b + 1) >> 1; /* c7 */ 59029a84457aed4c45bc900998b5e11c03023264208James Dong *(c1 += 384) = (a + c + 1) >> 1; /* c8 */ 59129a84457aed4c45bc900998b5e11c03023264208James Dong 59229a84457aed4c45bc900998b5e11c03023264208James Dong c1 += offset; 59329a84457aed4c45bc900998b5e11c03023264208James Dong } 59429a84457aed4c45bc900998b5e11c03023264208James Dong // advance to the next line, pitch is 24 59529a84457aed4c45bc900998b5e11c03023264208James Dong tl += 8; 59629a84457aed4c45bc900998b5e11c03023264208James Dong tr += 8; 59729a84457aed4c45bc900998b5e11c03023264208James Dong bl += 8; 59829a84457aed4c45bc900998b5e11c03023264208James Dong br += 8; 59929a84457aed4c45bc900998b5e11c03023264208James Dong c1 += 8; 60029a84457aed4c45bc900998b5e11c03023264208James Dong } 60129a84457aed4c45bc900998b5e11c03023264208James Dong } 60229a84457aed4c45bc900998b5e11c03023264208James Dong 60329a84457aed4c45bc900998b5e11c03023264208James Dong return ; 60429a84457aed4c45bc900998b5e11c03023264208James Dong} 60529a84457aed4c45bc900998b5e11c03023264208James Dong 60629a84457aed4c45bc900998b5e11c03023264208James Dong 60729a84457aed4c45bc900998b5e11c03023264208James Dong/* assuming cand always has a pitch of 24 */ 60829a84457aed4c45bc900998b5e11c03023264208James Dongint SATD_MB(uint8 *cand, uint8 *cur, int dmin) 60929a84457aed4c45bc900998b5e11c03023264208James Dong{ 61029a84457aed4c45bc900998b5e11c03023264208James Dong int cost; 61129a84457aed4c45bc900998b5e11c03023264208James Dong 61229a84457aed4c45bc900998b5e11c03023264208James Dong 61329a84457aed4c45bc900998b5e11c03023264208James Dong dmin = (dmin << 16) | 24; 61429a84457aed4c45bc900998b5e11c03023264208James Dong cost = AVCSAD_Macroblock_C(cand, cur, dmin, NULL); 61529a84457aed4c45bc900998b5e11c03023264208James Dong 61629a84457aed4c45bc900998b5e11c03023264208James Dong return cost; 61729a84457aed4c45bc900998b5e11c03023264208James Dong} 61829a84457aed4c45bc900998b5e11c03023264208James Dong 61929a84457aed4c45bc900998b5e11c03023264208James Dong 62029a84457aed4c45bc900998b5e11c03023264208James Dong 62129a84457aed4c45bc900998b5e11c03023264208James Dong 62229a84457aed4c45bc900998b5e11c03023264208James Dong 623