14a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber/* ------------------------------------------------------------------ 24a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * Copyright (C) 1998-2009 PacketVideo 34a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * 44a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * Licensed under the Apache License, Version 2.0 (the "License"); 54a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * you may not use this file except in compliance with the License. 64a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * You may obtain a copy of the License at 74a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * 84a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * http://www.apache.org/licenses/LICENSE-2.0 94a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * 104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * Unless required by applicable law or agreed to in writing, software 114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * distributed under the License is distributed on an "AS IS" BASIS, 124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * express or implied. 144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * See the License for the specific language governing permissions 154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * and limitations under the License. 164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * ------------------------------------------------------------------- 174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber */ 184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#include "avcdec_lib.h" 194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#define CLIP_RESULT(x) if((uint)x > 0xFF){ \ 224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber x = 0xFF & (~(x>>31));} 234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber/* (blkwidth << 2) + (dy << 1) + dx */ 254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huberstatic void (*const ChromaMC_SIMD[8])(uint8 *, int , int , int , uint8 *, int, int , int) = 264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{ 274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber &ChromaFullMC_SIMD, 284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber &ChromaHorizontalMC_SIMD, 294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber &ChromaVerticalMC_SIMD, 304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber &ChromaDiagonalMC_SIMD, 314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber &ChromaFullMC_SIMD, 324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber &ChromaHorizontalMC2_SIMD, 334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber &ChromaVerticalMC2_SIMD, 344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber &ChromaDiagonalMC2_SIMD 354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber}; 364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber/* Perform motion prediction and compensation with residue if exist. */ 374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid InterMBPrediction(AVCCommonObj *video) 384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{ 394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber AVCMacroblock *currMB = video->currMB; 404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber AVCPictureData *currPic = video->currPic; 414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int mbPartIdx, subMbPartIdx; 424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int ref_idx; 434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int offset_MbPart_indx = 0; 444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int16 *mv; 454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint32 x_pos, y_pos; 464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *curL, *curCb, *curCr; 474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *ref_l, *ref_Cb, *ref_Cr; 484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *predBlock, *predCb, *predCr; 494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int block_x, block_y, offset_x, offset_y, offsetP, offset; 504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int x_position = (video->mb_x << 4); 514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int y_position = (video->mb_y << 4); 524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int MbHeight, MbWidth, mbPartIdx_X, mbPartIdx_Y, offset_indx; 534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int picWidth = currPic->pitch; 544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int picHeight = currPic->height; 554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int16 *dataBlock; 564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint32 cbp4x4; 574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint32 tmp_word; 584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber tmp_word = y_position * picWidth; 604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber curL = currPic->Sl + tmp_word + x_position; 614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber offset = (tmp_word >> 2) + (x_position >> 1); 624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber curCb = currPic->Scb + offset; 634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber curCr = currPic->Scr + offset; 644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef USE_PRED_BLOCK 664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber predBlock = video->pred + 84; 674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber predCb = video->pred + 452; 684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber predCr = video->pred + 596; 694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#else 704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber predBlock = curL; 714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber predCb = curCb; 724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber predCr = curCr; 734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif 744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber GetMotionVectorPredictor(video, false); 764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (mbPartIdx = 0; mbPartIdx < currMB->NumMbPart; mbPartIdx++) 784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber MbHeight = currMB->SubMbPartHeight[mbPartIdx]; 804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber MbWidth = currMB->SubMbPartWidth[mbPartIdx]; 814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber mbPartIdx_X = ((mbPartIdx + offset_MbPart_indx) & 1); 824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber mbPartIdx_Y = (mbPartIdx + offset_MbPart_indx) >> 1; 834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref_idx = currMB->ref_idx_L0[(mbPartIdx_Y << 1) + mbPartIdx_X]; 844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber offset_indx = 0; 854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref_l = video->RefPicList0[ref_idx]->Sl; 874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref_Cb = video->RefPicList0[ref_idx]->Scb; 884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref_Cr = video->RefPicList0[ref_idx]->Scr; 894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (subMbPartIdx = 0; subMbPartIdx < currMB->NumSubMbPart[mbPartIdx]; subMbPartIdx++) 914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber block_x = (mbPartIdx_X << 1) + ((subMbPartIdx + offset_indx) & 1); // check this 934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber block_y = (mbPartIdx_Y << 1) + (((subMbPartIdx + offset_indx) >> 1) & 1); 944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber mv = (int16*)(currMB->mvL0 + block_x + (block_y << 2)); 954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber offset_x = x_position + (block_x << 2); 964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber offset_y = y_position + (block_y << 2); 974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber x_pos = (offset_x << 2) + *mv++; /*quarter pel */ 984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber y_pos = (offset_y << 2) + *mv; /*quarter pel */ 994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 1004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber //offset = offset_y * currPic->width; 1014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber //offsetC = (offset >> 2) + (offset_x >> 1); 1024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef USE_PRED_BLOCK 1034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber offsetP = (block_y * 80) + (block_x << 2); 1044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber LumaMotionComp(ref_l, picWidth, picHeight, x_pos, y_pos, 1054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /*comp_Sl + offset + offset_x,*/ 1064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber predBlock + offsetP, 20, MbWidth, MbHeight); 1074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#else 1084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber offsetP = (block_y << 2) * picWidth + (block_x << 2); 1094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber LumaMotionComp(ref_l, picWidth, picHeight, x_pos, y_pos, 1104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /*comp_Sl + offset + offset_x,*/ 1114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber predBlock + offsetP, picWidth, MbWidth, MbHeight); 1124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif 1134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 1144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef USE_PRED_BLOCK 1154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber offsetP = (block_y * 24) + (block_x << 1); 1164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ChromaMotionComp(ref_Cb, picWidth >> 1, picHeight >> 1, x_pos, y_pos, 1174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /*comp_Scb + offsetC,*/ 1184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber predCb + offsetP, 12, MbWidth >> 1, MbHeight >> 1); 1194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ChromaMotionComp(ref_Cr, picWidth >> 1, picHeight >> 1, x_pos, y_pos, 1204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /*comp_Scr + offsetC,*/ 1214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber predCr + offsetP, 12, MbWidth >> 1, MbHeight >> 1); 1224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#else 1234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber offsetP = (block_y * picWidth) + (block_x << 1); 1244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ChromaMotionComp(ref_Cb, picWidth >> 1, picHeight >> 1, x_pos, y_pos, 1254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /*comp_Scb + offsetC,*/ 1264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber predCb + offsetP, picWidth >> 1, MbWidth >> 1, MbHeight >> 1); 1274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ChromaMotionComp(ref_Cr, picWidth >> 1, picHeight >> 1, x_pos, y_pos, 1284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /*comp_Scr + offsetC,*/ 1294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber predCr + offsetP, picWidth >> 1, MbWidth >> 1, MbHeight >> 1); 1304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif 1314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 1324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber offset_indx = currMB->SubMbPartWidth[mbPartIdx] >> 3; 1334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 1344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber offset_MbPart_indx = currMB->MbPartWidth >> 4; 1354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 1364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 1374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* used in decoder, used to be if(!encFlag) */ 1384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 1394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* transform in raster scan order */ 1404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber dataBlock = video->block; 1414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber cbp4x4 = video->cbp4x4; 1424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* luma */ 1434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (block_y = 4; block_y > 0; block_y--) 1444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 1454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (block_x = 4; block_x > 0; block_x--) 1464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 1474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef USE_PRED_BLOCK 1484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (cbp4x4&1) 1494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 1504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber itrans(dataBlock, predBlock, predBlock, 20); 1514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 1524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#else 1534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (cbp4x4&1) 1544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 1554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber itrans(dataBlock, curL, curL, picWidth); 1564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 1574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif 1584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber cbp4x4 >>= 1; 1594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber dataBlock += 4; 1604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef USE_PRED_BLOCK 1614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber predBlock += 4; 1624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#else 1634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber curL += 4; 1644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif 1654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 1664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber dataBlock += 48; 1674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef USE_PRED_BLOCK 1684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber predBlock += 64; 1694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#else 1704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber curL += ((picWidth << 2) - 16); 1714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif 1724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 1734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 1744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* chroma */ 1754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber picWidth = (picWidth >> 1); 1764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (block_y = 2; block_y > 0; block_y--) 1774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 1784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (block_x = 2; block_x > 0; block_x--) 1794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 1804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef USE_PRED_BLOCK 1814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (cbp4x4&1) 1824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 1834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ictrans(dataBlock, predCb, predCb, 12); 1844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 1854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#else 1864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (cbp4x4&1) 1874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 1884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ictrans(dataBlock, curCb, curCb, picWidth); 1894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 1904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif 1914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber cbp4x4 >>= 1; 1924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber dataBlock += 4; 1934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef USE_PRED_BLOCK 1944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber predCb += 4; 1954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#else 1964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber curCb += 4; 1974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif 1984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 1994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (block_x = 2; block_x > 0; block_x--) 2004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 2014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef USE_PRED_BLOCK 2024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (cbp4x4&1) 2034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 2044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ictrans(dataBlock, predCr, predCr, 12); 2054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 2064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#else 2074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (cbp4x4&1) 2084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 2094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ictrans(dataBlock, curCr, curCr, picWidth); 2104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 2114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif 2124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber cbp4x4 >>= 1; 2134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber dataBlock += 4; 2144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef USE_PRED_BLOCK 2154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber predCr += 4; 2164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#else 2174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber curCr += 4; 2184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif 2194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 2204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber dataBlock += 48; 2214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef USE_PRED_BLOCK 2224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber predCb += 40; 2234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber predCr += 40; 2244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#else 2254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber curCb += ((picWidth << 2) - 8); 2264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber curCr += ((picWidth << 2) - 8); 2274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif 2284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 2294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 2304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef MB_BASED_DEBLOCK 2314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber SaveNeighborForIntraPred(video, offset); 2324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif 2334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 2344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber return ; 2354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber} 2364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 2374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 2384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber/* preform the actual motion comp here */ 2394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid LumaMotionComp(uint8 *ref, int picwidth, int picheight, 2404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int x_pos, int y_pos, 2414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *pred, int pred_pitch, 2424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int blkwidth, int blkheight) 2434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{ 2444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int dx, dy; 2454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 temp[24][24]; /* for padding, make the size multiple of 4 for packing */ 2464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int temp2[21][21]; /* for intermediate results */ 2474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *ref2; 2484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 2494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber dx = x_pos & 3; 2504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber dy = y_pos & 3; 2514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber x_pos = x_pos >> 2; /* round it to full-pel resolution */ 2524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber y_pos = y_pos >> 2; 2534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 2544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* perform actual motion compensation */ 2554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (dx == 0 && dy == 0) 2564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { /* fullpel position *//* G */ 2574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (x_pos >= 0 && x_pos + blkwidth <= picwidth && y_pos >= 0 && y_pos + blkheight <= picheight) 2584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 2594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += y_pos * picwidth + x_pos; 2604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber FullPelMC(ref, picwidth, pred, pred_pitch, blkwidth, blkheight); 2614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 2624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber else 2634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 2644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CreatePad(ref, picwidth, picheight, x_pos, y_pos, &temp[0][0], blkwidth, blkheight); 2654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber FullPelMC(&temp[0][0], 24, pred, pred_pitch, blkwidth, blkheight); 2664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 2674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 2684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } /* other positions */ 2694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber else if (dy == 0) 2704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { /* no vertical interpolation *//* a,b,c*/ 2714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 2724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (x_pos - 2 >= 0 && x_pos + 3 + blkwidth <= picwidth && y_pos >= 0 && y_pos + blkheight <= picheight) 2734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 2744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += y_pos * picwidth + x_pos; 2754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 2764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber HorzInterp1MC(ref, picwidth, pred, pred_pitch, blkwidth, blkheight, dx); 2774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 2784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber else /* need padding */ 2794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 2804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CreatePad(ref, picwidth, picheight, x_pos - 2, y_pos, &temp[0][0], blkwidth + 5, blkheight); 2814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 2824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber HorzInterp1MC(&temp[0][2], 24, pred, pred_pitch, blkwidth, blkheight, dx); 2834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 2844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 2854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber else if (dx == 0) 2864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { /*no horizontal interpolation *//* d,h,n */ 2874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 2884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (x_pos >= 0 && x_pos + blkwidth <= picwidth && y_pos - 2 >= 0 && y_pos + 3 + blkheight <= picheight) 2894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 2904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += y_pos * picwidth + x_pos; 2914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 2924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber VertInterp1MC(ref, picwidth, pred, pred_pitch, blkwidth, blkheight, dy); 2934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 2944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber else /* need padding */ 2954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 2964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CreatePad(ref, picwidth, picheight, x_pos, y_pos - 2, &temp[0][0], blkwidth, blkheight + 5); 2974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 2984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber VertInterp1MC(&temp[2][0], 24, pred, pred_pitch, blkwidth, blkheight, dy); 2994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 3004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 3014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber else if (dy == 2) 3024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { /* horizontal cross *//* i, j, k */ 3034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 3044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (x_pos - 2 >= 0 && x_pos + 3 + blkwidth <= picwidth && y_pos - 2 >= 0 && y_pos + 3 + blkheight <= picheight) 3054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 3064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += y_pos * picwidth + x_pos - 2; /* move to the left 2 pixels */ 3074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 3084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber VertInterp2MC(ref, picwidth, &temp2[0][0], 21, blkwidth + 5, blkheight); 3094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 3104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber HorzInterp2MC(&temp2[0][2], 21, pred, pred_pitch, blkwidth, blkheight, dx); 3114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 3124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber else /* need padding */ 3134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 3144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CreatePad(ref, picwidth, picheight, x_pos - 2, y_pos - 2, &temp[0][0], blkwidth + 5, blkheight + 5); 3154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 3164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber VertInterp2MC(&temp[2][0], 24, &temp2[0][0], 21, blkwidth + 5, blkheight); 3174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 3184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber HorzInterp2MC(&temp2[0][2], 21, pred, pred_pitch, blkwidth, blkheight, dx); 3194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 3204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 3214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber else if (dx == 2) 3224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { /* vertical cross */ /* f,q */ 3234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 3244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (x_pos - 2 >= 0 && x_pos + 3 + blkwidth <= picwidth && y_pos - 2 >= 0 && y_pos + 3 + blkheight <= picheight) 3254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 3264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += (y_pos - 2) * picwidth + x_pos; /* move to up 2 lines */ 3274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 3284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber HorzInterp3MC(ref, picwidth, &temp2[0][0], 21, blkwidth, blkheight + 5); 3294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber VertInterp3MC(&temp2[2][0], 21, pred, pred_pitch, blkwidth, blkheight, dy); 3304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 3314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber else /* need padding */ 3324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 3334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CreatePad(ref, picwidth, picheight, x_pos - 2, y_pos - 2, &temp[0][0], blkwidth + 5, blkheight + 5); 3344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber HorzInterp3MC(&temp[0][2], 24, &temp2[0][0], 21, blkwidth, blkheight + 5); 3354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber VertInterp3MC(&temp2[2][0], 21, pred, pred_pitch, blkwidth, blkheight, dy); 3364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 3374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 3384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber else 3394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { /* diagonal *//* e,g,p,r */ 3404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 3414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (x_pos - 2 >= 0 && x_pos + 3 + (dx / 2) + blkwidth <= picwidth && 3424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber y_pos - 2 >= 0 && y_pos + 3 + blkheight + (dy / 2) <= picheight) 3434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 3444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref2 = ref + (y_pos + (dy / 2)) * picwidth + x_pos; 3454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 3464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += (y_pos * picwidth) + x_pos + (dx / 2); 3474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 3484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber DiagonalInterpMC(ref2, ref, picwidth, pred, pred_pitch, blkwidth, blkheight); 3494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 3504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber else /* need padding */ 3514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 3524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CreatePad(ref, picwidth, picheight, x_pos - 2, y_pos - 2, &temp[0][0], blkwidth + 5 + (dx / 2), blkheight + 5 + (dy / 2)); 3534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 3544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref2 = &temp[2 + (dy/2)][2]; 3554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 3564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref = &temp[2][2 + (dx/2)]; 3574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 3584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber DiagonalInterpMC(ref2, ref, 24, pred, pred_pitch, blkwidth, blkheight); 3594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 3604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 3614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 3624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber return ; 3634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber} 3644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 3654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid CreateAlign(uint8 *ref, int picwidth, int y_pos, 3664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *out, int blkwidth, int blkheight) 3674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{ 3684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int i, j; 3694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int offset, out_offset; 3704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint32 prev_pix, result, pix1, pix2, pix4; 3714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 3724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out_offset = 24 - blkwidth; 3734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 3744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber //switch(x_pos&0x3){ 3754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber switch (((uint32)ref)&0x3) 3764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 3774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber case 1: 3784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += y_pos * picwidth; 3794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber offset = picwidth - blkwidth - 3; 3804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = 0; j < blkheight; j++) 3814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 3824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pix1 = *ref++; 3834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pix2 = *((uint16*)ref); 3844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += 2; 3854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (pix2 << 8) | pix1; 3864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 3874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (i = 3; i < blkwidth; i += 4) 3884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 3894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pix4 = *((uint32*)ref); 3904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += 4; 3914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber prev_pix = (pix4 << 24) & 0xFF000000; /* mask out byte belong to previous word */ 3924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result |= prev_pix; 3934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *((uint32*)out) = result; /* write 4 bytes */ 3944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out += 4; 3954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = pix4 >> 8; /* for the next loop */ 3964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 3974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += offset; 3984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out += out_offset; 3994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 4004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber break; 4014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber case 2: 4024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += y_pos * picwidth; 4034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber offset = picwidth - blkwidth - 2; 4044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = 0; j < blkheight; j++) 4054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 4064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = *((uint16*)ref); 4074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += 2; 4084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (i = 2; i < blkwidth; i += 4) 4094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 4104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pix4 = *((uint32*)ref); 4114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += 4; 4124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber prev_pix = (pix4 << 16) & 0xFFFF0000; /* mask out byte belong to previous word */ 4134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result |= prev_pix; 4144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *((uint32*)out) = result; /* write 4 bytes */ 4154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out += 4; 4164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = pix4 >> 16; /* for the next loop */ 4174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 4184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += offset; 4194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out += out_offset; 4204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 4214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber break; 4224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber case 3: 4234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += y_pos * picwidth; 4244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber offset = picwidth - blkwidth - 1; 4254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = 0; j < blkheight; j++) 4264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 4274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = *ref++; 4284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (i = 1; i < blkwidth; i += 4) 4294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 4304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pix4 = *((uint32*)ref); 4314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += 4; 4324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber prev_pix = (pix4 << 8) & 0xFFFFFF00; /* mask out byte belong to previous word */ 4334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result |= prev_pix; 4344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *((uint32*)out) = result; /* write 4 bytes */ 4354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out += 4; 4364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = pix4 >> 24; /* for the next loop */ 4374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 4384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += offset; 4394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out += out_offset; 4404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 4414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber break; 4424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 4434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber} 4444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 4454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid CreatePad(uint8 *ref, int picwidth, int picheight, int x_pos, int y_pos, 4464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *out, int blkwidth, int blkheight) 4474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{ 4484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int x_inc0, x_mid; 4494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int y_inc, y_inc0, y_inc1, y_mid; 4504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int i, j; 4514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int offset; 4524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 4534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (x_pos < 0) 4544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 4554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber x_inc0 = 0; /* increment for the first part */ 4564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber x_mid = ((blkwidth + x_pos > 0) ? -x_pos : blkwidth); /* stopping point */ 4574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber x_pos = 0; 4584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 4594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber else if (x_pos + blkwidth > picwidth) 4604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 4614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber x_inc0 = 1; /* increasing */ 4624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber x_mid = ((picwidth > x_pos) ? picwidth - x_pos - 1 : 0); /* clip negative to zero, encode fool proof! */ 4634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 4644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber else /* normal case */ 4654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 4664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber x_inc0 = 1; 4674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber x_mid = blkwidth; /* just one run */ 4684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 4694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 4704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 4714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* boundary for y_pos, taking the result from x_pos into account */ 4724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (y_pos < 0) 4734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 4744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber y_inc0 = (x_inc0 ? - x_mid : -blkwidth + x_mid); /* offset depending on x_inc1 and x_inc0 */ 4754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber y_inc1 = picwidth + y_inc0; 4764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber y_mid = ((blkheight + y_pos > 0) ? -y_pos : blkheight); /* clip to prevent memory corruption */ 4774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber y_pos = 0; 4784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 4794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber else if (y_pos + blkheight > picheight) 4804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 4814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber y_inc1 = (x_inc0 ? - x_mid : -blkwidth + x_mid); /* saturate */ 4824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber y_inc0 = picwidth + y_inc1; /* increasing */ 4834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber y_mid = ((picheight > y_pos) ? picheight - 1 - y_pos : 0); 4844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 4854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber else /* normal case */ 4864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 4874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber y_inc1 = (x_inc0 ? - x_mid : -blkwidth + x_mid); 4884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber y_inc0 = picwidth + y_inc1; 4894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber y_mid = blkheight; 4904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 4914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 4924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* clip y_pos and x_pos */ 4934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (y_pos > picheight - 1) y_pos = picheight - 1; 4944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (x_pos > picwidth - 1) x_pos = picwidth - 1; 4954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 4964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += y_pos * picwidth + x_pos; 4974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 4984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber y_inc = y_inc0; /* start with top half */ 4994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 5004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber offset = 24 - blkwidth; /* to use in offset out */ 5014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber blkwidth -= x_mid; /* to use in the loop limit */ 5024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 5034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (x_inc0 == 0) 5044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 5054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = 0; j < blkheight; j++) 5064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 5074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (j == y_mid) /* put a check here to reduce the code size (for unrolling the loop) */ 5084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 5094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber y_inc = y_inc1; /* switch to lower half */ 5104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 5114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (i = x_mid; i > 0; i--) /* first or third quarter */ 5124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 5134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *out++ = *ref; 5144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 5154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (i = blkwidth; i > 0; i--) /* second or fourth quarter */ 5164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 5174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *out++ = *ref++; 5184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 5194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out += offset; 5204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += y_inc; 5214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 5224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 5234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber else 5244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 5254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = 0; j < blkheight; j++) 5264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 5274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (j == y_mid) /* put a check here to reduce the code size (for unrolling the loop) */ 5284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 5294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber y_inc = y_inc1; /* switch to lower half */ 5304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 5314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (i = x_mid; i > 0; i--) /* first or third quarter */ 5324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 5334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *out++ = *ref++; 5344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 5354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (i = blkwidth; i > 0; i--) /* second or fourth quarter */ 5364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 5374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *out++ = *ref; 5384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 5394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out += offset; 5404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += y_inc; 5414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 5424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 5434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 5444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber return ; 5454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber} 5464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 5474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid HorzInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch, 5484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int blkwidth, int blkheight, int dx) 5494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{ 5504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *p_ref; 5514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint32 *p_cur; 5524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint32 tmp, pkres; 5534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int result, curr_offset, ref_offset; 5544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int j; 5554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int32 r0, r1, r2, r3, r4, r5; 5564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int32 r13, r6; 5574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 5584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur = (uint32*)out; /* assume it's word aligned */ 5594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber curr_offset = (outpitch - blkwidth) >> 2; 5604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref = in; 5614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref_offset = inpitch - blkwidth; 5624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 5634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (dx&1) 5644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 5654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber dx = ((dx >> 1) ? -3 : -4); /* use in 3/4 pel */ 5664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref -= 2; 5674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r13 = 0; 5684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = blkheight; j > 0; j--) 5694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 5704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber tmp = (uint32)(p_ref + blkwidth); 5714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = p_ref[0]; 5724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = p_ref[2]; 5734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 |= (r1 << 16); /* 0,c,0,a */ 5744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = p_ref[1]; 5754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = p_ref[3]; 5764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 |= (r2 << 16); /* 0,d,0,b */ 5774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber while ((uint32)p_ref < tmp) 5784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 5794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *(p_ref += 4); /* move pointer to e */ 5804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = p_ref[2]; 5814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 |= (r3 << 16); /* 0,g,0,e */ 5824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = p_ref[1]; 5834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 = p_ref[3]; 5844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 |= (r4 << 16); /* 0,h,0,f */ 5854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 5864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 = r0 + r3; /* c+h, a+f */ 5874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = r0 + r1; /* c+d, a+b */ 5884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 = r2 + r3; /* g+h, e+f */ 5894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 >>= 16; 5904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 |= (r6 << 16); /* e+f, c+d */ 5914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 += r5 * 20; /* c+20*e+20*f+h, a+20*c+20*d+f */ 5924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 += 0x100010; /* +16, +16 */ 5934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = r1 + r2; /* d+g, b+e */ 5944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 -= r5 * 5; /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */ 5954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 >>= 5; 5964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r13 |= r4; /* check clipping */ 5974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 5984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = p_ref[dx+2]; 5994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 = p_ref[dx+4]; 6004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 |= (r6 << 16); 6014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 += r5; 6024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 += 0x10001; 6034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 = (r4 >> 1) & 0xFF00FF; 6044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 6054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = p_ref[4]; /* i */ 6064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 = (r5 << 16); 6074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = r6 | (r2 >> 16);/* 0,i,0,g */ 6084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 += r1; /* d+i, b+g */ /* r5 not free */ 6094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 >>= 16; 6104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */ 6114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 += r2; /* f+g, d+e */ 6124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 += 20 * r1; /* d+20f+20g+i, b+20d+20e+g */ 6134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 >>= 16; 6144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */ 6154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 += r3; /* e+h, c+f */ 6164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 += 0x100010; /* 16,16 */ 6174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 -= r0 * 5; /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */ 6184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 >>= 5; 6194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r13 |= r5; /* check clipping */ 6204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 6214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = p_ref[dx+3]; 6224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = p_ref[dx+5]; 6234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 |= (r1 << 16); 6244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 += r0; 6254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 += 0x10001; 6264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = (r5 >> 1) & 0xFF00FF; 6274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 6284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 |= (r5 << 8); /* pack them together */ 6294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *p_cur++ = r4; 6304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = r3; 6314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = r2; 6324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 6334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur += curr_offset; /* move to the next line */ 6344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */ 6354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 6364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (r13&0xFF000700) /* need clipping */ 6374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 6384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* move back to the beginning of the line */ 6394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref -= (ref_offset + blkwidth); /* input */ 6404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur -= (outpitch >> 2); 6414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 6424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber tmp = (uint32)(p_ref + blkwidth); 6434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (; (uint32)p_ref < tmp;) 6444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 6454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 6464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *p_ref++; 6474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *p_ref++; 6484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *p_ref++; 6494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = *p_ref++; 6504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 = *p_ref++; 6514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* first pixel */ 6524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = *p_ref++; 6534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r0 + r5); 6544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r1 + r4); 6554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r0 * 5);//result -= r0; result -= (r0<<2); 6564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r2 + r3); 6574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 6584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 6594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 6604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* 3/4 pel, no need to clip */ 6614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + p_ref[dx] + 1); 6624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pkres = (result >> 1) ; 6634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* second pixel */ 6644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *p_ref++; 6654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r1 + r0); 6664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r2 + r5); 6674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r1 * 5);//result -= r1; result -= (r1<<2); 6684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r3 + r4); 6694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 6704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 6714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 6724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* 3/4 pel, no need to clip */ 6734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + p_ref[dx] + 1); 6744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result >> 1); 6754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pkres |= (result << 8); 6764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* third pixel */ 6774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *p_ref++; 6784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r2 + r1); 6794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r3 + r0); 6804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r2 * 5);//result -= r2; result -= (r2<<2); 6814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r4 + r5); 6824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 6834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 6844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 6854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* 3/4 pel, no need to clip */ 6864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + p_ref[dx] + 1); 6874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result >> 1); 6884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pkres |= (result << 16); 6894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* fourth pixel */ 6904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *p_ref++; 6914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r3 + r2); 6924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r4 + r1); 6934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r3 * 5);//result -= r3; result -= (r3<<2); 6944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r5 + r0); 6954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 6964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 6974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 6984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* 3/4 pel, no need to clip */ 6994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + p_ref[dx] + 1); 7004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result >> 1); 7014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pkres |= (result << 24); 7024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *p_cur++ = pkres; /* write 4 pixels */ 7034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref -= 5; /* offset back to the middle of filter */ 7044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 7054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur += curr_offset; /* move to the next line */ 7064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref += ref_offset; /* move to the next line */ 7074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 7084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 7094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 7104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber else 7114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 7124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref -= 2; 7134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r13 = 0; 7144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = blkheight; j > 0; j--) 7154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 7164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber tmp = (uint32)(p_ref + blkwidth); 7174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = p_ref[0]; 7184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = p_ref[2]; 7194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 |= (r1 << 16); /* 0,c,0,a */ 7204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = p_ref[1]; 7214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = p_ref[3]; 7224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 |= (r2 << 16); /* 0,d,0,b */ 7234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber while ((uint32)p_ref < tmp) 7244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 7254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *(p_ref += 4); /* move pointer to e */ 7264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = p_ref[2]; 7274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 |= (r3 << 16); /* 0,g,0,e */ 7284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = p_ref[1]; 7294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 = p_ref[3]; 7304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 |= (r4 << 16); /* 0,h,0,f */ 7314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 7324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 = r0 + r3; /* c+h, a+f */ 7334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = r0 + r1; /* c+d, a+b */ 7344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 = r2 + r3; /* g+h, e+f */ 7354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 >>= 16; 7364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 |= (r6 << 16); /* e+f, c+d */ 7374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 += r5 * 20; /* c+20*e+20*f+h, a+20*c+20*d+f */ 7384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 += 0x100010; /* +16, +16 */ 7394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = r1 + r2; /* d+g, b+e */ 7404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 -= r5 * 5; /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */ 7414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 >>= 5; 7424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r13 |= r4; /* check clipping */ 7434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 &= 0xFF00FF; /* mask */ 7444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 7454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = p_ref[4]; /* i */ 7464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 = (r5 << 16); 7474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = r6 | (r2 >> 16);/* 0,i,0,g */ 7484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 += r1; /* d+i, b+g */ /* r5 not free */ 7494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 >>= 16; 7504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */ 7514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 += r2; /* f+g, d+e */ 7524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 += 20 * r1; /* d+20f+20g+i, b+20d+20e+g */ 7534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 >>= 16; 7544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */ 7554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 += r3; /* e+h, c+f */ 7564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 += 0x100010; /* 16,16 */ 7574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 -= r0 * 5; /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */ 7584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 >>= 5; 7594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r13 |= r5; /* check clipping */ 7604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 &= 0xFF00FF; /* mask */ 7614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 7624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 |= (r5 << 8); /* pack them together */ 7634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *p_cur++ = r4; 7644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = r3; 7654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = r2; 7664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 7674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur += curr_offset; /* move to the next line */ 7684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */ 7694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 7704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (r13&0xFF000700) /* need clipping */ 7714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 7724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* move back to the beginning of the line */ 7734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref -= (ref_offset + blkwidth); /* input */ 7744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur -= (outpitch >> 2); 7754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 7764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber tmp = (uint32)(p_ref + blkwidth); 7774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (; (uint32)p_ref < tmp;) 7784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 7794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 7804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *p_ref++; 7814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *p_ref++; 7824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *p_ref++; 7834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = *p_ref++; 7844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 = *p_ref++; 7854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* first pixel */ 7864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = *p_ref++; 7874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r0 + r5); 7884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r1 + r4); 7894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r0 * 5);//result -= r0; result -= (r0<<2); 7904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r2 + r3); 7914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 7924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 7934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 7944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pkres = result; 7954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* second pixel */ 7964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *p_ref++; 7974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r1 + r0); 7984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r2 + r5); 7994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r1 * 5);//result -= r1; result -= (r1<<2); 8004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r3 + r4); 8014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 8024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 8034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 8044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pkres |= (result << 8); 8054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* third pixel */ 8064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *p_ref++; 8074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r2 + r1); 8084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r3 + r0); 8094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r2 * 5);//result -= r2; result -= (r2<<2); 8104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r4 + r5); 8114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 8124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 8134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 8144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pkres |= (result << 16); 8154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* fourth pixel */ 8164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *p_ref++; 8174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r3 + r2); 8184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r4 + r1); 8194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r3 * 5);//result -= r3; result -= (r3<<2); 8204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r5 + r0); 8214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 8224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 8234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 8244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pkres |= (result << 24); 8254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *p_cur++ = pkres; /* write 4 pixels */ 8264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref -= 5; 8274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 8284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur += curr_offset; /* move to the next line */ 8294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref += ref_offset; 8304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 8314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 8324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 8334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 8344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber return ; 8354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber} 8364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 8374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid HorzInterp2MC(int *in, int inpitch, uint8 *out, int outpitch, 8384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int blkwidth, int blkheight, int dx) 8394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{ 8404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int *p_ref; 8414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint32 *p_cur; 8424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint32 tmp, pkres; 8434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int result, result2, curr_offset, ref_offset; 8444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int j, r0, r1, r2, r3, r4, r5; 8454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 8464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur = (uint32*)out; /* assume it's word aligned */ 8474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber curr_offset = (outpitch - blkwidth) >> 2; 8484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref = in; 8494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref_offset = inpitch - blkwidth; 8504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 8514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (dx&1) 8524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 8534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber dx = ((dx >> 1) ? -3 : -4); /* use in 3/4 pel */ 8544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 8554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = blkheight; j > 0 ; j--) 8564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 8574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber tmp = (uint32)(p_ref + blkwidth); 8584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (; (uint32)p_ref < tmp;) 8594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 8604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 8614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = p_ref[-2]; 8624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = p_ref[-1]; 8634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *p_ref++; 8644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = *p_ref++; 8654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 = *p_ref++; 8664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* first pixel */ 8674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = *p_ref++; 8684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r0 + r5); 8694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r1 + r4); 8704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r0 * 5);//result -= r0; result -= (r0<<2); 8714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r2 + r3); 8724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 8734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 512) >> 10; 8744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 8754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result2 = ((p_ref[dx] + 16) >> 5); 8764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result2) 8774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* 3/4 pel, no need to clip */ 8784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + result2 + 1); 8794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pkres = (result >> 1); 8804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* second pixel */ 8814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *p_ref++; 8824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r1 + r0); 8834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r2 + r5); 8844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r1 * 5);//result -= r1; result -= (r1<<2); 8854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r3 + r4); 8864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 8874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 512) >> 10; 8884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 8894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result2 = ((p_ref[dx] + 16) >> 5); 8904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result2) 8914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* 3/4 pel, no need to clip */ 8924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + result2 + 1); 8934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result >> 1); 8944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pkres |= (result << 8); 8954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* third pixel */ 8964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *p_ref++; 8974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r2 + r1); 8984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r3 + r0); 8994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r2 * 5);//result -= r2; result -= (r2<<2); 9004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r4 + r5); 9014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 9024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 512) >> 10; 9034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 9044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result2 = ((p_ref[dx] + 16) >> 5); 9054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result2) 9064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* 3/4 pel, no need to clip */ 9074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + result2 + 1); 9084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result >> 1); 9094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pkres |= (result << 16); 9104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* fourth pixel */ 9114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *p_ref++; 9124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r3 + r2); 9134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r4 + r1); 9144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r3 * 5);//result -= r3; result -= (r3<<2); 9154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r5 + r0); 9164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 9174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 512) >> 10; 9184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 9194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result2 = ((p_ref[dx] + 16) >> 5); 9204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result2) 9214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* 3/4 pel, no need to clip */ 9224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + result2 + 1); 9234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result >> 1); 9244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pkres |= (result << 24); 9254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *p_cur++ = pkres; /* write 4 pixels */ 9264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref -= 3; /* offset back to the middle of filter */ 9274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 9284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur += curr_offset; /* move to the next line */ 9294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref += ref_offset; /* move to the next line */ 9304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 9314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 9324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber else 9334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 9344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = blkheight; j > 0 ; j--) 9354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 9364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber tmp = (uint32)(p_ref + blkwidth); 9374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (; (uint32)p_ref < tmp;) 9384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 9394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 9404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = p_ref[-2]; 9414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = p_ref[-1]; 9424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *p_ref++; 9434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = *p_ref++; 9444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 = *p_ref++; 9454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* first pixel */ 9464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = *p_ref++; 9474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r0 + r5); 9484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r1 + r4); 9494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r0 * 5);//result -= r0; result -= (r0<<2); 9504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r2 + r3); 9514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 9524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 512) >> 10; 9534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 9544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pkres = result; 9554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* second pixel */ 9564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *p_ref++; 9574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r1 + r0); 9584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r2 + r5); 9594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r1 * 5);//result -= r1; result -= (r1<<2); 9604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r3 + r4); 9614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 9624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 512) >> 10; 9634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 9644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pkres |= (result << 8); 9654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* third pixel */ 9664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *p_ref++; 9674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r2 + r1); 9684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r3 + r0); 9694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r2 * 5);//result -= r2; result -= (r2<<2); 9704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r4 + r5); 9714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 9724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 512) >> 10; 9734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 9744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pkres |= (result << 16); 9754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* fourth pixel */ 9764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *p_ref++; 9774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r3 + r2); 9784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r4 + r1); 9794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r3 * 5);//result -= r3; result -= (r3<<2); 9804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r5 + r0); 9814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 9824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 512) >> 10; 9834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 9844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pkres |= (result << 24); 9854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *p_cur++ = pkres; /* write 4 pixels */ 9864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref -= 3; /* offset back to the middle of filter */ 9874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 9884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur += curr_offset; /* move to the next line */ 9894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref += ref_offset; /* move to the next line */ 9904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 9914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 9924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 9934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber return ; 9944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber} 9954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 9964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid HorzInterp3MC(uint8 *in, int inpitch, int *out, int outpitch, 9974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int blkwidth, int blkheight) 9984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{ 9994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *p_ref; 10004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int *p_cur; 10014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint32 tmp; 10024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int result, curr_offset, ref_offset; 10034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int j, r0, r1, r2, r3, r4, r5; 10044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 10054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur = out; 10064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber curr_offset = (outpitch - blkwidth); 10074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref = in; 10084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref_offset = inpitch - blkwidth; 10094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 10104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = blkheight; j > 0 ; j--) 10114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 10124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber tmp = (uint32)(p_ref + blkwidth); 10134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (; (uint32)p_ref < tmp;) 10144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 10154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 10164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = p_ref[-2]; 10174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = p_ref[-1]; 10184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *p_ref++; 10194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = *p_ref++; 10204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 = *p_ref++; 10214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* first pixel */ 10224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = *p_ref++; 10234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r0 + r5); 10244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r1 + r4); 10254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r0 * 5);//result -= r0; result -= (r0<<2); 10264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r2 + r3); 10274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 10284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *p_cur++ = result; 10294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* second pixel */ 10304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *p_ref++; 10314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r1 + r0); 10324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r2 + r5); 10334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r1 * 5);//result -= r1; result -= (r1<<2); 10344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r3 + r4); 10354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 10364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *p_cur++ = result; 10374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* third pixel */ 10384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *p_ref++; 10394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r2 + r1); 10404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r3 + r0); 10414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r2 * 5);//result -= r2; result -= (r2<<2); 10424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r4 + r5); 10434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 10444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *p_cur++ = result; 10454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* fourth pixel */ 10464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *p_ref++; 10474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r3 + r2); 10484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r4 + r1); 10494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r3 * 5);//result -= r3; result -= (r3<<2); 10504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r5 + r0); 10514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 10524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *p_cur++ = result; 10534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref -= 3; /* move back to the middle of the filter */ 10544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 10554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur += curr_offset; /* move to the next line */ 10564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref += ref_offset; 10574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 10584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 10594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber return ; 10604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber} 10614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid VertInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch, 10624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int blkwidth, int blkheight, int dy) 10634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{ 10644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *p_cur, *p_ref; 10654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint32 tmp; 10664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int result, curr_offset, ref_offset; 10674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int j, i; 10684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int32 r0, r1, r2, r3, r4, r5, r6, r7, r8, r13; 10694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 tmp_in[24][24]; 10704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 10714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* not word-aligned */ 10724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (((uint32)in)&0x3) 10734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 10744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CreateAlign(in, inpitch, -2, &tmp_in[0][0], blkwidth, blkheight + 5); 10754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber in = &tmp_in[2][0]; 10764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber inpitch = 24; 10774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 10784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur = out; 10794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */ 10804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref_offset = blkheight * inpitch; /* for limit */ 10814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 10824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber curr_offset += 3; 10834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 10844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (dy&1) 10854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 10864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber dy = (dy >> 1) ? 0 : -inpitch; 10874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 10884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = 0; j < blkwidth; j += 4, in += 4) 10894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 10904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r13 = 0; 10914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref = in; 10924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur -= outpitch; /* compensate for the first offset */ 10934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber tmp = (uint32)(p_ref + ref_offset); /* limit */ 10944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber while ((uint32)p_ref < tmp) /* the loop un-rolled */ 10954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 10964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */ 10974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref += inpitch; 10984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */ 10994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 &= 0xFF00FF; 11004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 11014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *((uint32*)(p_ref + (inpitch << 1))); /* r1, r7, ref[3] */ 11024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r7 = (r1 >> 8) & 0xFF00FF; 11034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 &= 0xFF00FF; 11044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 11054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 += r1; 11064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 += r7; 11074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 11084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */ 11094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r8 = (r2 >> 8) & 0xFF00FF; 11104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 &= 0xFF00FF; 11114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 11124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */ 11134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r7 = (r1 >> 8) & 0xFF00FF; 11144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 &= 0xFF00FF; 11154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 += r2; 11164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 11174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r7 += r8; 11184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 11194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 += 20 * r1; 11204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 += 20 * r7; 11214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 += 0x100010; 11224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 += 0x100010; 11234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 11244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */ 11254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r8 = (r2 >> 8) & 0xFF00FF; 11264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 &= 0xFF00FF; 11274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 11284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */ 11294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r7 = (r1 >> 8) & 0xFF00FF; 11304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 &= 0xFF00FF; 11314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 += r2; 11324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 11334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r7 += r8; 11344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 11354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 -= 5 * r1; 11364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 -= 5 * r7; 11374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 11384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 >>= 5; 11394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 >>= 5; 11404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* clip */ 11414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r13 |= r6; 11424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r13 |= r0; 11434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber //CLIPPACK(r6,result) 11444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 11454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *((uint32*)(p_ref + dy)); 11464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r1 >> 8) & 0xFF00FF; 11474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 &= 0xFF00FF; 11484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 += r1; 11494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 += r2; 11504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 += 0x10001; 11514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 += 0x10001; 11524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r0 >> 1) & 0xFF00FF; 11534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 = (r6 >> 1) & 0xFF00FF; 11544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 11554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 |= (r6 << 8); /* pack it back */ 11564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *((uint32*)(p_cur += outpitch)) = r0; 11574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 11584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur += curr_offset; /* offset to the next pixel */ 11594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (r13 & 0xFF000700) /* this column need clipping */ 11604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 11614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur -= 4; 11624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (i = 0; i < 4; i++) 11634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 11644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref = in + i; 11654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur -= outpitch; /* compensate for the first offset */ 11664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 11674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber tmp = (uint32)(p_ref + ref_offset); /* limit */ 11684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber while ((uint32)p_ref < tmp) 11694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { /* loop un-rolled */ 11704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *(p_ref - (inpitch << 1)); 11714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *(p_ref - inpitch); 11724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *p_ref; 11734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = *(p_ref += inpitch); /* modify pointer before loading */ 11744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 = *(p_ref += inpitch); 11754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* first pixel */ 11764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = *(p_ref += inpitch); 11774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r0 + r5); 11784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r1 + r4); 11794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r0 * 5);//result -= r0; result -= (r0<<2); 11804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r2 + r3); 11814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 11824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 11834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 11844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* 3/4 pel, no need to clip */ 11854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + p_ref[dy-(inpitch<<1)] + 1); 11864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result >> 1); 11874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 11884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* second pixel */ 11894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *(p_ref += inpitch); 11904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r1 + r0); 11914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r2 + r5); 11924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r1 * 5);//result -= r1; result -= (r1<<2); 11934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r3 + r4); 11944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 11954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 11964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 11974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* 3/4 pel, no need to clip */ 11984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + p_ref[dy-(inpitch<<1)] + 1); 11994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result >> 1); 12004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 12014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* third pixel */ 12024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *(p_ref += inpitch); 12034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r2 + r1); 12044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r3 + r0); 12054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r2 * 5);//result -= r2; result -= (r2<<2); 12064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r4 + r5); 12074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 12084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 12094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 12104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* 3/4 pel, no need to clip */ 12114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + p_ref[dy-(inpitch<<1)] + 1); 12124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result >> 1); 12134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 12144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* fourth pixel */ 12154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *(p_ref += inpitch); 12164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r3 + r2); 12174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r4 + r1); 12184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r3 * 5);//result -= r3; result -= (r3<<2); 12194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r5 + r0); 12204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 12214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 12224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 12234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* 3/4 pel, no need to clip */ 12244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + p_ref[dy-(inpitch<<1)] + 1); 12254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result >> 1); 12264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 12274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ 12284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 12294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur += (curr_offset - 3); 12304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 12314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 12324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 12334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 12344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber else 12354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 12364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = 0; j < blkwidth; j += 4, in += 4) 12374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 12384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r13 = 0; 12394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref = in; 12404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur -= outpitch; /* compensate for the first offset */ 12414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber tmp = (uint32)(p_ref + ref_offset); /* limit */ 12424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber while ((uint32)p_ref < tmp) /* the loop un-rolled */ 12434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 12444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */ 12454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref += inpitch; 12464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */ 12474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 &= 0xFF00FF; 12484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 12494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *((uint32*)(p_ref + (inpitch << 1))); /* r1, r7, ref[3] */ 12504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r7 = (r1 >> 8) & 0xFF00FF; 12514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 &= 0xFF00FF; 12524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 12534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 += r1; 12544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 += r7; 12554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 12564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */ 12574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r8 = (r2 >> 8) & 0xFF00FF; 12584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 &= 0xFF00FF; 12594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 12604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */ 12614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r7 = (r1 >> 8) & 0xFF00FF; 12624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 &= 0xFF00FF; 12634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 += r2; 12644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 12654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r7 += r8; 12664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 12674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 += 20 * r1; 12684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 += 20 * r7; 12694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 += 0x100010; 12704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 += 0x100010; 12714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 12724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */ 12734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r8 = (r2 >> 8) & 0xFF00FF; 12744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 &= 0xFF00FF; 12754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 12764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */ 12774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r7 = (r1 >> 8) & 0xFF00FF; 12784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 &= 0xFF00FF; 12794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 += r2; 12804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 12814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r7 += r8; 12824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 12834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 -= 5 * r1; 12844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 -= 5 * r7; 12854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 12864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 >>= 5; 12874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 >>= 5; 12884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* clip */ 12894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r13 |= r6; 12904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r13 |= r0; 12914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber //CLIPPACK(r6,result) 12924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 &= 0xFF00FF; 12934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 &= 0xFF00FF; 12944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 |= (r6 << 8); /* pack it back */ 12954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *((uint32*)(p_cur += outpitch)) = r0; 12964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 12974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur += curr_offset; /* offset to the next pixel */ 12984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (r13 & 0xFF000700) /* this column need clipping */ 12994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 13004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur -= 4; 13014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (i = 0; i < 4; i++) 13024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 13034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref = in + i; 13044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur -= outpitch; /* compensate for the first offset */ 13054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber tmp = (uint32)(p_ref + ref_offset); /* limit */ 13064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber while ((uint32)p_ref < tmp) 13074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { /* loop un-rolled */ 13084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *(p_ref - (inpitch << 1)); 13094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *(p_ref - inpitch); 13104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *p_ref; 13114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = *(p_ref += inpitch); /* modify pointer before loading */ 13124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 = *(p_ref += inpitch); 13134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* first pixel */ 13144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = *(p_ref += inpitch); 13154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r0 + r5); 13164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r1 + r4); 13174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r0 * 5);//result -= r0; result -= (r0<<2); 13184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r2 + r3); 13194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 13204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 13214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 13224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 13234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* second pixel */ 13244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *(p_ref += inpitch); 13254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r1 + r0); 13264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r2 + r5); 13274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r1 * 5);//result -= r1; result -= (r1<<2); 13284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r3 + r4); 13294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 13304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 13314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 13324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 13334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* third pixel */ 13344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *(p_ref += inpitch); 13354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r2 + r1); 13364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r3 + r0); 13374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r2 * 5);//result -= r2; result -= (r2<<2); 13384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r4 + r5); 13394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 13404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 13414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 13424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 13434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* fourth pixel */ 13444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *(p_ref += inpitch); 13454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r3 + r2); 13464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r4 + r1); 13474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r3 * 5);//result -= r3; result -= (r3<<2); 13484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r5 + r0); 13494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 13504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 13514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 13524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 13534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ 13544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 13554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur += (curr_offset - 3); 13564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 13574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 13584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 13594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 13604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 13614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber return ; 13624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber} 13634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 13644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid VertInterp2MC(uint8 *in, int inpitch, int *out, int outpitch, 13654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int blkwidth, int blkheight) 13664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{ 13674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int *p_cur; 13684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *p_ref; 13694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint32 tmp; 13704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int result, curr_offset, ref_offset; 13714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int j, r0, r1, r2, r3, r4, r5; 13724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 13734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur = out; 13744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */ 13754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref_offset = blkheight * inpitch; /* for limit */ 13764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 13774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = 0; j < blkwidth; j++) 13784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 13794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur -= outpitch; /* compensate for the first offset */ 13804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref = in++; 13814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 13824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber tmp = (uint32)(p_ref + ref_offset); /* limit */ 13834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber while ((uint32)p_ref < tmp) 13844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { /* loop un-rolled */ 13854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *(p_ref - (inpitch << 1)); 13864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *(p_ref - inpitch); 13874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *p_ref; 13884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = *(p_ref += inpitch); /* modify pointer before loading */ 13894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 = *(p_ref += inpitch); 13904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* first pixel */ 13914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = *(p_ref += inpitch); 13924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r0 + r5); 13934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r1 + r4); 13944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r0 * 5);//result -= r0; result -= (r0<<2); 13954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r2 + r3); 13964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 13974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 13984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* second pixel */ 13994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *(p_ref += inpitch); 14004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r1 + r0); 14014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r2 + r5); 14024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r1 * 5);//result -= r1; result -= (r1<<2); 14034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r3 + r4); 14044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 14054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 14064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* third pixel */ 14074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *(p_ref += inpitch); 14084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r2 + r1); 14094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r3 + r0); 14104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r2 * 5);//result -= r2; result -= (r2<<2); 14114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r4 + r5); 14124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 14134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 14144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* fourth pixel */ 14154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *(p_ref += inpitch); 14164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r3 + r2); 14174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r4 + r1); 14184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r3 * 5);//result -= r3; result -= (r3<<2); 14194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r5 + r0); 14204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 14214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 14224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ 14234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 14244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur += curr_offset; 14254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 14264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 14274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber return ; 14284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber} 14294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 14304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid VertInterp3MC(int *in, int inpitch, uint8 *out, int outpitch, 14314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int blkwidth, int blkheight, int dy) 14324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{ 14334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *p_cur; 14344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int *p_ref; 14354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint32 tmp; 14364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int result, result2, curr_offset, ref_offset; 14374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int j, r0, r1, r2, r3, r4, r5; 14384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 14394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur = out; 14404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */ 14414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref_offset = blkheight * inpitch; /* for limit */ 14424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 14434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (dy&1) 14444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 14454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber dy = (dy >> 1) ? -(inpitch << 1) : -(inpitch << 1) - inpitch; 14464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 14474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = 0; j < blkwidth; j++) 14484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 14494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur -= outpitch; /* compensate for the first offset */ 14504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref = in++; 14514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 14524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber tmp = (uint32)(p_ref + ref_offset); /* limit */ 14534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber while ((uint32)p_ref < tmp) 14544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { /* loop un-rolled */ 14554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *(p_ref - (inpitch << 1)); 14564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *(p_ref - inpitch); 14574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *p_ref; 14584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = *(p_ref += inpitch); /* modify pointer before loading */ 14594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 = *(p_ref += inpitch); 14604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* first pixel */ 14614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = *(p_ref += inpitch); 14624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r0 + r5); 14634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r1 + r4); 14644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r0 * 5);//result -= r0; result -= (r0<<2); 14654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r2 + r3); 14664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 14674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 512) >> 10; 14684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 14694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result2 = ((p_ref[dy] + 16) >> 5); 14704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result2) 14714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* 3/4 pel, no need to clip */ 14724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + result2 + 1); 14734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result >> 1); 14744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 14754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* second pixel */ 14764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *(p_ref += inpitch); 14774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r1 + r0); 14784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r2 + r5); 14794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r1 * 5);//result -= r1; result -= (r1<<2); 14804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r3 + r4); 14814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 14824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 512) >> 10; 14834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 14844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result2 = ((p_ref[dy] + 16) >> 5); 14854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result2) 14864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* 3/4 pel, no need to clip */ 14874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + result2 + 1); 14884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result >> 1); 14894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 14904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* third pixel */ 14914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *(p_ref += inpitch); 14924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r2 + r1); 14934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r3 + r0); 14944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r2 * 5);//result -= r2; result -= (r2<<2); 14954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r4 + r5); 14964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 14974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 512) >> 10; 14984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 14994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result2 = ((p_ref[dy] + 16) >> 5); 15004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result2) 15014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* 3/4 pel, no need to clip */ 15024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + result2 + 1); 15034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result >> 1); 15044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 15054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* fourth pixel */ 15064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *(p_ref += inpitch); 15074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r3 + r2); 15084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r4 + r1); 15094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r3 * 5);//result -= r3; result -= (r3<<2); 15104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r5 + r0); 15114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 15124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 512) >> 10; 15134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 15144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result2 = ((p_ref[dy] + 16) >> 5); 15154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result2) 15164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* 3/4 pel, no need to clip */ 15174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + result2 + 1); 15184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result >> 1); 15194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 15204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ 15214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 15224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur += curr_offset; 15234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 15244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 15254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber else 15264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 15274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = 0; j < blkwidth; j++) 15284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 15294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur -= outpitch; /* compensate for the first offset */ 15304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref = in++; 15314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 15324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber tmp = (uint32)(p_ref + ref_offset); /* limit */ 15334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber while ((uint32)p_ref < tmp) 15344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { /* loop un-rolled */ 15354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *(p_ref - (inpitch << 1)); 15364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *(p_ref - inpitch); 15374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *p_ref; 15384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = *(p_ref += inpitch); /* modify pointer before loading */ 15394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 = *(p_ref += inpitch); 15404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* first pixel */ 15414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = *(p_ref += inpitch); 15424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r0 + r5); 15434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r1 + r4); 15444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r0 * 5);//result -= r0; result -= (r0<<2); 15454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r2 + r3); 15464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 15474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 512) >> 10; 15484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 15494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 15504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* second pixel */ 15514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *(p_ref += inpitch); 15524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r1 + r0); 15534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r2 + r5); 15544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r1 * 5);//result -= r1; result -= (r1<<2); 15554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r3 + r4); 15564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 15574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 512) >> 10; 15584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 15594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 15604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* third pixel */ 15614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *(p_ref += inpitch); 15624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r2 + r1); 15634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r3 + r0); 15644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r2 * 5);//result -= r2; result -= (r2<<2); 15654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r4 + r5); 15664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 15674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 512) >> 10; 15684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 15694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 15704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* fourth pixel */ 15714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *(p_ref += inpitch); 15724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r3 + r2); 15734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r4 + r1); 15744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r3 * 5);//result -= r3; result -= (r3<<2); 15754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r5 + r0); 15764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 15774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 512) >> 10; 15784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 15794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 15804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ 15814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 15824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur += curr_offset; 15834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 15844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 15854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 15864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber return ; 15874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber} 15884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 15894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid DiagonalInterpMC(uint8 *in1, uint8 *in2, int inpitch, 15904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *out, int outpitch, 15914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int blkwidth, int blkheight) 15924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{ 15934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int j, i; 15944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int result; 15954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *p_cur, *p_ref, *p_tmp8; 15964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int curr_offset, ref_offset; 15974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 tmp_res[24][24], tmp_in[24][24]; 15984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint32 *p_tmp; 15994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint32 tmp, pkres, tmp_result; 16004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int32 r0, r1, r2, r3, r4, r5; 16014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int32 r6, r7, r8, r9, r10, r13; 16024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 16034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref_offset = inpitch - blkwidth; 16044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref = in1 - 2; 16054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* perform horizontal interpolation */ 16064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* not word-aligned */ 16074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* It is faster to read 1 byte at time to avoid calling CreateAlign */ 16084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* if(((uint32)p_ref)&0x3) 16094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 16104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CreateAlign(p_ref,inpitch,0,&tmp_in[0][0],blkwidth+8,blkheight); 16114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref = &tmp_in[0][0]; 16124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref_offset = 24-blkwidth; 16134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber }*/ 16144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 16154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_tmp = (uint32*) & (tmp_res[0][0]); 16164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = blkheight; j > 0; j--) 16174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 16184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r13 = 0; 16194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber tmp = (uint32)(p_ref + blkwidth); 16204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 16214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber //r0 = *((uint32*)p_ref); /* d,c,b,a */ 16224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber //r1 = (r0>>8)&0xFF00FF; /* 0,d,0,b */ 16234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber //r0 &= 0xFF00FF; /* 0,c,0,a */ 16244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* It is faster to read 1 byte at a time, */ 16254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = p_ref[0]; 16264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = p_ref[2]; 16274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 |= (r1 << 16); /* 0,c,0,a */ 16284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = p_ref[1]; 16294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = p_ref[3]; 16304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 |= (r2 << 16); /* 0,d,0,b */ 16314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 16324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber while ((uint32)p_ref < tmp) 16334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 16344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber //r2 = *((uint32*)(p_ref+=4));/* h,g,f,e */ 16354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber //r3 = (r2>>8)&0xFF00FF; /* 0,h,0,f */ 16364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber //r2 &= 0xFF00FF; /* 0,g,0,e */ 16374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* It is faster to read 1 byte at a time, */ 16384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *(p_ref += 4); 16394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = p_ref[2]; 16404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 |= (r3 << 16); /* 0,g,0,e */ 16414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = p_ref[1]; 16424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 = p_ref[3]; 16434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 |= (r4 << 16); /* 0,h,0,f */ 16444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 16454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 = r0 + r3; /* c+h, a+f */ 16464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = r0 + r1; /* c+d, a+b */ 16474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 = r2 + r3; /* g+h, e+f */ 16484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 >>= 16; 16494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 |= (r6 << 16); /* e+f, c+d */ 16504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 += r5 * 20; /* c+20*e+20*f+h, a+20*c+20*d+f */ 16514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 += 0x100010; /* +16, +16 */ 16524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = r1 + r2; /* d+g, b+e */ 16534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 -= r5 * 5; /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */ 16544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 >>= 5; 16554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r13 |= r4; /* check clipping */ 16564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 &= 0xFF00FF; /* mask */ 16574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 16584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = p_ref[4]; /* i */ 16594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 = (r5 << 16); 16604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = r6 | (r2 >> 16);/* 0,i,0,g */ 16614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 += r1; /* d+i, b+g */ /* r5 not free */ 16624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 >>= 16; 16634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */ 16644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 += r2; /* f+g, d+e */ 16654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 += 20 * r1; /* d+20f+20g+i, b+20d+20e+g */ 16664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 >>= 16; 16674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */ 16684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 += r3; /* e+h, c+f */ 16694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 += 0x100010; /* 16,16 */ 16704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 -= r0 * 5; /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */ 16714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 >>= 5; 16724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r13 |= r5; /* check clipping */ 16734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 &= 0xFF00FF; /* mask */ 16744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 16754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 |= (r5 << 8); /* pack them together */ 16764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *p_tmp++ = r4; 16774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = r3; 16784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = r2; 16794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 16804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_tmp += ((24 - blkwidth) >> 2); /* move to the next line */ 16814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */ 16824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 16834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (r13&0xFF000700) /* need clipping */ 16844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 16854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* move back to the beginning of the line */ 16864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref -= (ref_offset + blkwidth); /* input */ 16874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_tmp -= 6; /* intermediate output */ 16884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber tmp = (uint32)(p_ref + blkwidth); 16894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber while ((uint32)p_ref < tmp) 16904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 16914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *p_ref++; 16924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *p_ref++; 16934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *p_ref++; 16944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = *p_ref++; 16954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 = *p_ref++; 16964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* first pixel */ 16974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = *p_ref++; 16984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r0 + r5); 16994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r1 + r4); 17004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r0 * 5);//result -= r0; result -= (r0<<2); 17014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r2 + r3); 17024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 17034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 17044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 17054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pkres = result; 17064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* second pixel */ 17074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *p_ref++; 17084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r1 + r0); 17094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r2 + r5); 17104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r1 * 5);//result -= r1; result -= (r1<<2); 17114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r3 + r4); 17124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 17134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 17144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 17154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pkres |= (result << 8); 17164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* third pixel */ 17174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *p_ref++; 17184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r2 + r1); 17194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r3 + r0); 17204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r2 * 5);//result -= r2; result -= (r2<<2); 17214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r4 + r5); 17224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 17234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 17244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 17254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pkres |= (result << 16); 17264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* fourth pixel */ 17274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *p_ref++; 17284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r3 + r2); 17294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r4 + r1); 17304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r3 * 5);//result -= r3; result -= (r3<<2); 17314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r5 + r0); 17324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 17334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 17344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 17354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pkres |= (result << 24); 17364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 17374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *p_tmp++ = pkres; /* write 4 pixel */ 17384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref -= 5; 17394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 17404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_tmp += ((24 - blkwidth) >> 2); /* move to the next line */ 17414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */ 17424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 17434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 17444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 17454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* perform vertical interpolation */ 17464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* not word-aligned */ 17474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (((uint32)in2)&0x3) 17484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 17494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CreateAlign(in2, inpitch, -2, &tmp_in[0][0], blkwidth, blkheight + 5); 17504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber in2 = &tmp_in[2][0]; 17514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber inpitch = 24; 17524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 17534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 17544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur = out; 17554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically up and one pixel right */ 17564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pkres = blkheight * inpitch; /* reuse it for limit */ 17574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 17584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber curr_offset += 3; 17594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 17604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = 0; j < blkwidth; j += 4, in2 += 4) 17614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 17624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r13 = 0; 17634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref = in2; 17644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_tmp8 = &(tmp_res[0][j]); /* intermediate result */ 17654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_tmp8 -= 24; /* compensate for the first offset */ 17664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur -= outpitch; /* compensate for the first offset */ 17674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber tmp = (uint32)(p_ref + pkres); /* limit */ 17684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber while ((uint32)p_ref < tmp) /* the loop un-rolled */ 17694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 17704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* Read 1 byte at a time is too slow, too many read and pack ops, need to call CreateAlign, */ 17714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /*p_ref8 = p_ref-(inpitch<<1); r0 = p_ref8[0]; r1 = p_ref8[2]; 17724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 |= (r1<<16); r6 = p_ref8[1]; r1 = p_ref8[3]; 17734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 |= (r1<<16); p_ref+=inpitch; */ 17744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */ 17754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref += inpitch; 17764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */ 17774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 &= 0xFF00FF; 17784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 17794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /*p_ref8 = p_ref+(inpitch<<1); 17804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = p_ref8[0]; r7 = p_ref8[2]; r1 |= (r7<<16); 17814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r7 = p_ref8[1]; r2 = p_ref8[3]; r7 |= (r2<<16);*/ 17824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *((uint32*)(p_ref + (inpitch << 1))); /* r1, r7, ref[3] */ 17834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r7 = (r1 >> 8) & 0xFF00FF; 17844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 &= 0xFF00FF; 17854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 17864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 += r1; 17874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 += r7; 17884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 17894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /*r2 = p_ref[0]; r8 = p_ref[2]; r2 |= (r8<<16); 17904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r8 = p_ref[1]; r1 = p_ref[3]; r8 |= (r1<<16);*/ 17914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */ 17924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r8 = (r2 >> 8) & 0xFF00FF; 17934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 &= 0xFF00FF; 17944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 17954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /*p_ref8 = p_ref-inpitch; r1 = p_ref8[0]; r7 = p_ref8[2]; 17964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 |= (r7<<16); r1 += r2; r7 = p_ref8[1]; 17974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = p_ref8[3]; r7 |= (r2<<16);*/ 17984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */ 17994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r7 = (r1 >> 8) & 0xFF00FF; 18004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 &= 0xFF00FF; 18014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 += r2; 18024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 18034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r7 += r8; 18044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 18054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 += 20 * r1; 18064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 += 20 * r7; 18074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 += 0x100010; 18084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 += 0x100010; 18094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 18104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /*p_ref8 = p_ref-(inpitch<<1); r2 = p_ref8[0]; r8 = p_ref8[2]; 18114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 |= (r8<<16); r8 = p_ref8[1]; r1 = p_ref8[3]; r8 |= (r1<<16);*/ 18124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */ 18134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r8 = (r2 >> 8) & 0xFF00FF; 18144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 &= 0xFF00FF; 18154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 18164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /*p_ref8 = p_ref+inpitch; r1 = p_ref8[0]; r7 = p_ref8[2]; 18174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 |= (r7<<16); r1 += r2; r7 = p_ref8[1]; 18184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = p_ref8[3]; r7 |= (r2<<16);*/ 18194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */ 18204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r7 = (r1 >> 8) & 0xFF00FF; 18214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 &= 0xFF00FF; 18224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 += r2; 18234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 18244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r7 += r8; 18254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 18264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 -= 5 * r1; 18274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 -= 5 * r7; 18284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 18294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 >>= 5; 18304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 >>= 5; 18314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* clip */ 18324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r13 |= r6; 18334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r13 |= r0; 18344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber //CLIPPACK(r6,result) 18354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* add with horizontal results */ 18364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r10 = *((uint32*)(p_tmp8 += 24)); 18374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r9 = (r10 >> 8) & 0xFF00FF; 18384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r10 &= 0xFF00FF; 18394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 18404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 += r10; 18414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 += 0x10001; 18424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r0 >> 1) & 0xFF00FF; /* mask to 8 bytes */ 18434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 18444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 += r9; 18454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 += 0x10001; 18464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r6 = (r6 >> 1) & 0xFF00FF; /* mask to 8 bytes */ 18474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 18484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 |= (r6 << 8); /* pack it back */ 18494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *((uint32*)(p_cur += outpitch)) = r0; 18504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 18514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur += curr_offset; /* offset to the next pixel */ 18524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (r13 & 0xFF000700) /* this column need clipping */ 18534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 18544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur -= 4; 18554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (i = 0; i < 4; i++) 18564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 18574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref = in2 + i; 18584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_tmp8 = &(tmp_res[0][j+i]); /* intermediate result */ 18594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_tmp8 -= 24; /* compensate for the first offset */ 18604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur -= outpitch; /* compensate for the first offset */ 18614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber tmp = (uint32)(p_ref + pkres); /* limit */ 18624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber while ((uint32)p_ref < tmp) /* the loop un-rolled */ 18634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 18644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *(p_ref - (inpitch << 1)); 18654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *(p_ref - inpitch); 18664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *p_ref; 18674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = *(p_ref += inpitch); /* modify pointer before loading */ 18684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r4 = *(p_ref += inpitch); 18694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* first pixel */ 18704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r5 = *(p_ref += inpitch); 18714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r0 + r5); 18724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r1 + r4); 18734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r0 * 5);//result -= r0; result -= (r0<<2); 18744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = (r2 + r3); 18754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 18764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 18774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 18784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber tmp_result = *(p_tmp8 += 24); /* modify pointer before loading */ 18794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + tmp_result + 1); /* no clip */ 18804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result >> 1); 18814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 18824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* second pixel */ 18834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *(p_ref += inpitch); 18844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r1 + r0); 18854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r2 + r5); 18864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r1 * 5);//result -= r1; result -= (r1<<2); 18874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = (r3 + r4); 18884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 18894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 18904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 18914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber tmp_result = *(p_tmp8 += 24); /* intermediate result */ 18924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + tmp_result + 1); /* no clip */ 18934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result >> 1); 18944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 18954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* third pixel */ 18964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *(p_ref += inpitch); 18974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r2 + r1); 18984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r3 + r0); 18994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r2 * 5);//result -= r2; result -= (r2<<2); 19004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = (r4 + r5); 19014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 19024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 19034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 19044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber tmp_result = *(p_tmp8 += 24); /* intermediate result */ 19054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + tmp_result + 1); /* no clip */ 19064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result >> 1); 19074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 19084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* fourth pixel */ 19094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *(p_ref += inpitch); 19104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (r3 + r2); 19114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r4 + r1); 19124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result -= (r3 * 5);//result -= r3; result -= (r3<<2); 19134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = (r5 + r0); 19144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 19154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + 16) >> 5; 19164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CLIP_RESULT(result) 19174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber tmp_result = *(p_tmp8 += 24); /* intermediate result */ 19184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result + tmp_result + 1); /* no clip */ 19194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = (result >> 1); 19204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(p_cur += outpitch) = result; 19214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ 19224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 19234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber p_cur += (curr_offset - 3); 19244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 19254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 19264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 19274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 19284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber return ; 19294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber} 19304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 19314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber/* position G */ 19324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid FullPelMC(uint8 *in, int inpitch, uint8 *out, int outpitch, 19334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int blkwidth, int blkheight) 19344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{ 19354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int i, j; 19364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int offset_in = inpitch - blkwidth; 19374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int offset_out = outpitch - blkwidth; 19384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint32 temp; 19394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 byte; 19404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 19414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (((uint32)in)&3) 19424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 19434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = blkheight; j > 0; j--) 19444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 19454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (i = blkwidth; i > 0; i -= 4) 19464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 19474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber temp = *in++; 19484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber byte = *in++; 19494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber temp |= (byte << 8); 19504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber byte = *in++; 19514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber temp |= (byte << 16); 19524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber byte = *in++; 19534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber temp |= (byte << 24); 19544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 19554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *((uint32*)out) = temp; /* write 4 bytes */ 19564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out += 4; 19574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 19584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out += offset_out; 19594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber in += offset_in; 19604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 19614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 19624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber else 19634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 19644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = blkheight; j > 0; j--) 19654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 19664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (i = blkwidth; i > 0; i -= 4) 19674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 19684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber temp = *((uint32*)in); 19694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *((uint32*)out) = temp; 19704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber in += 4; 19714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out += 4; 19724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 19734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out += offset_out; 19744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber in += offset_in; 19754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 19764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 19774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber return ; 19784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber} 19794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 19804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid ChromaMotionComp(uint8 *ref, int picwidth, int picheight, 19814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int x_pos, int y_pos, 19824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *pred, int pred_pitch, 19834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int blkwidth, int blkheight) 19844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{ 19854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int dx, dy; 19864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int offset_dx, offset_dy; 19874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int index; 19884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 temp[24][24]; 19894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 19904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber dx = x_pos & 7; 19914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber dy = y_pos & 7; 19924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber offset_dx = (dx + 7) >> 3; 19934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber offset_dy = (dy + 7) >> 3; 19944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber x_pos = x_pos >> 3; /* round it to full-pel resolution */ 19954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber y_pos = y_pos >> 3; 19964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 19974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if ((x_pos >= 0 && x_pos + blkwidth + offset_dx <= picwidth) && (y_pos >= 0 && y_pos + blkheight + offset_dy <= picheight)) 19984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 19994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += y_pos * picwidth + x_pos; 20004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 20014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber else 20024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 20034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber CreatePad(ref, picwidth, picheight, x_pos, y_pos, &temp[0][0], blkwidth + offset_dx, blkheight + offset_dy); 20044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref = &temp[0][0]; 20054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber picwidth = 24; 20064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 20074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 20084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber index = offset_dx + (offset_dy << 1) + ((blkwidth << 1) & 0x7); 20094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 20104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber (*(ChromaMC_SIMD[index]))(ref, picwidth , dx, dy, pred, pred_pitch, blkwidth, blkheight); 20114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber return ; 20124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber} 20134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 20144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 20154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber/* SIMD routines, unroll the loops in vertical direction, decreasing loops (things to be done) */ 20164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid ChromaDiagonalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, 20174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *pOut, int predPitch, int blkwidth, int blkheight) 20184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{ 20194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int32 r0, r1, r2, r3, result0, result1; 20204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 temp[288]; 20214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *ref, *out; 20224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int i, j; 20234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int dx_8 = 8 - dx; 20244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int dy_8 = 8 - dy; 20254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 20264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* horizontal first */ 20274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out = temp; 20284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (i = 0; i < blkheight + 1; i++) 20294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 20304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref = pRef; 20314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = ref[0]; 20324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = 0; j < blkwidth; j += 4) 20334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 20344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 |= (ref[2] << 16); 20354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result0 = dx_8 * r0; 20364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 20374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = ref[1] | (ref[3] << 16); 20384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result0 += dx * r1; 20394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(int32 *)out = result0; 20404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 20414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result0 = dx_8 * r1; 20424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 20434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = ref[4]; 20444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = r0 >> 16; 20454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = r0 | (r2 << 16); 20464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result0 += dx * r1; 20474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(int32 *)(out + 16) = result0; 20484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 20494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += 4; 20504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out += 4; 20514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = r2; 20524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 20534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pRef += srcPitch; 20544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out += (32 - blkwidth); 20554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 20564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 20574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber// pRef -= srcPitch*(blkheight+1); 20584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref = temp; 20594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 20604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = 0; j < blkwidth; j += 4) 20614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 20624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *(int32 *)ref; 20634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *(int32 *)(ref + 16); 20644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += 32; 20654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out = pOut; 20664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (i = 0; i < (blkheight >> 1); i++) 20674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 20684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result0 = dy_8 * r0 + 0x00200020; 20694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *(int32 *)ref; 20704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result0 += dy * r2; 20714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result0 >>= 6; 20724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result0 &= 0x00FF00FF; 20734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = r2; 20744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 20754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result1 = dy_8 * r1 + 0x00200020; 20764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = *(int32 *)(ref + 16); 20774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result1 += dy * r3; 20784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result1 >>= 6; 20794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result1 &= 0x00FF00FF; 20804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = r3; 20814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(int32 *)out = result0 | (result1 << 8); 20824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out += predPitch; 20834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += 32; 20844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 20854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result0 = dy_8 * r0 + 0x00200020; 20864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = *(int32 *)ref; 20874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result0 += dy * r2; 20884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result0 >>= 6; 20894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result0 &= 0x00FF00FF; 20904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = r2; 20914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 20924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result1 = dy_8 * r1 + 0x00200020; 20934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = *(int32 *)(ref + 16); 20944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result1 += dy * r3; 20954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result1 >>= 6; 20964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result1 &= 0x00FF00FF; 20974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = r3; 20984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(int32 *)out = result0 | (result1 << 8); 20994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out += predPitch; 21004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += 32; 21014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 21024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pOut += 4; 21034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref = temp + 4; /* since it can only iterate twice max */ 21044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 21054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber return; 21064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber} 21074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 21084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid ChromaHorizontalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, 21094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *pOut, int predPitch, int blkwidth, int blkheight) 21104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{ 21114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber OSCL_UNUSED_ARG(dy); 21124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int32 r0, r1, r2, result0, result1; 21134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *ref, *out; 21144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int i, j; 21154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int dx_8 = 8 - dx; 21164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 21174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* horizontal first */ 21184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (i = 0; i < blkheight; i++) 21194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 21204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref = pRef; 21214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out = pOut; 21224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 21234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = ref[0]; 21244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = 0; j < blkwidth; j += 4) 21254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 21264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 |= (ref[2] << 16); 21274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result0 = dx_8 * r0 + 0x00040004; 21284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 21294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = ref[1] | (ref[3] << 16); 21304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result0 += dx * r1; 21314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result0 >>= 3; 21324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result0 &= 0x00FF00FF; 21334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 21344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result1 = dx_8 * r1 + 0x00040004; 21354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 21364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = ref[4]; 21374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = r0 >> 16; 21384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = r0 | (r2 << 16); 21394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result1 += dx * r1; 21404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result1 >>= 3; 21414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result1 &= 0x00FF00FF; 21424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 21434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(int32 *)out = result0 | (result1 << 8); 21444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 21454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += 4; 21464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out += 4; 21474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = r2; 21484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 21494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 21504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pRef += srcPitch; 21514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pOut += predPitch; 21524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 21534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber return; 21544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber} 21554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 21564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid ChromaVerticalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, 21574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *pOut, int predPitch, int blkwidth, int blkheight) 21584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{ 21594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber OSCL_UNUSED_ARG(dx); 21604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int32 r0, r1, r2, r3, result0, result1; 21614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int i, j; 21624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *ref, *out; 21634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int dy_8 = 8 - dy; 21644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* vertical first */ 21654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (i = 0; i < blkwidth; i += 4) 21664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 21674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref = pRef; 21684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out = pOut; 21694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 21704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = ref[0] | (ref[2] << 16); 21714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = ref[1] | (ref[3] << 16); 21724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += srcPitch; 21734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = 0; j < blkheight; j++) 21744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 21754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result0 = dy_8 * r0 + 0x00040004; 21764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r2 = ref[0] | (ref[2] << 16); 21774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result0 += dy * r2; 21784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result0 >>= 3; 21794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result0 &= 0x00FF00FF; 21804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = r2; 21814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 21824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result1 = dy_8 * r1 + 0x00040004; 21834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r3 = ref[1] | (ref[3] << 16); 21844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result1 += dy * r3; 21854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result1 >>= 3; 21864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result1 &= 0x00FF00FF; 21874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = r3; 21884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(int32 *)out = result0 | (result1 << 8); 21894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber ref += srcPitch; 21904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out += predPitch; 21914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 21924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pOut += 4; 21934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pRef += 4; 21944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 21954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber return; 21964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber} 21974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 21984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid ChromaDiagonalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, 21994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *pOut, int predPitch, int blkwidth, int blkheight) 22004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{ 22014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber OSCL_UNUSED_ARG(blkwidth); 22024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int32 r0, r1, temp0, temp1, result; 22034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int32 temp[9]; 22044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int32 *out; 22054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int i, r_temp; 22064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int dy_8 = 8 - dy; 22074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 22084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* horizontal first */ 22094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out = temp; 22104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (i = 0; i < blkheight + 1; i++) 22114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 22124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r_temp = pRef[1]; 22134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber temp0 = (pRef[0] << 3) + dx * (r_temp - pRef[0]); 22144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber temp1 = (r_temp << 3) + dx * (pRef[2] - r_temp); 22154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = temp0 | (temp1 << 16); 22164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *out++ = r0; 22174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pRef += srcPitch; 22184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 22194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 22204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pRef -= srcPitch * (blkheight + 1); 22214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 22224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber out = temp; 22234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 22244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = *out++; 22254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 22264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (i = 0; i < blkheight; i++) 22274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 22284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = dy_8 * r0 + 0x00200020; 22294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = *out++; 22304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += dy * r1; 22314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result >>= 6; 22324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result &= 0x00FF00FF; 22334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(int16 *)pOut = (result >> 8) | (result & 0xFF); 22344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = r1; 22354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pOut += predPitch; 22364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 22374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber return; 22384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber} 22394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 22404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid ChromaHorizontalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, 22414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *pOut, int predPitch, int blkwidth, int blkheight) 22424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{ 22434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber OSCL_UNUSED_ARG(dy); 22444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber OSCL_UNUSED_ARG(blkwidth); 22454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int i, temp, temp0, temp1; 22464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 22474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber /* horizontal first */ 22484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (i = 0; i < blkheight; i++) 22494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 22504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber temp = pRef[1]; 22514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber temp0 = ((pRef[0] << 3) + dx * (temp - pRef[0]) + 4) >> 3; 22524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber temp1 = ((temp << 3) + dx * (pRef[2] - temp) + 4) >> 3; 22534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 22544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(int16 *)pOut = temp0 | (temp1 << 8); 22554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pRef += srcPitch; 22564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pOut += predPitch; 22574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 22584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 22594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber return; 22604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber} 22614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid ChromaVerticalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, 22624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *pOut, int predPitch, int blkwidth, int blkheight) 22634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{ 22644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber OSCL_UNUSED_ARG(dx); 22654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber OSCL_UNUSED_ARG(blkwidth); 22664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int32 r0, r1, result; 22674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int i; 22684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int dy_8 = 8 - dy; 22694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = pRef[0] | (pRef[1] << 16); 22704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pRef += srcPitch; 22714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (i = 0; i < blkheight; i++) 22724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 22734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result = dy_8 * r0 + 0x00040004; 22744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r1 = pRef[0] | (pRef[1] << 16); 22754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result += dy * r1; 22764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result >>= 3; 22774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber result &= 0x00FF00FF; 22784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *(int16 *)pOut = (result >> 8) | (result & 0xFF); 22794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber r0 = r1; 22804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pRef += srcPitch; 22814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pOut += predPitch; 22824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 22834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber return; 22844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber} 22854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 22864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid ChromaFullMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, 22874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 *pOut, int predPitch, int blkwidth, int blkheight) 22884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{ 22894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber OSCL_UNUSED_ARG(dx); 22904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber OSCL_UNUSED_ARG(dy); 22914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int i, j; 22924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int offset_in = srcPitch - blkwidth; 22934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber int offset_out = predPitch - blkwidth; 22944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint16 temp; 22954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber uint8 byte; 22964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber 22974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber if (((uint32)pRef)&1) 22984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 22994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = blkheight; j > 0; j--) 23004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 23014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (i = blkwidth; i > 0; i -= 2) 23024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 23034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber temp = *pRef++; 23044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber byte = *pRef++; 23054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber temp |= (byte << 8); 23064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *((uint16*)pOut) = temp; /* write 2 bytes */ 23074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pOut += 2; 23084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 23094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pOut += offset_out; 23104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pRef += offset_in; 23114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 23124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 23134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber else 23144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 23154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (j = blkheight; j > 0; j--) 23164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 23174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber for (i = blkwidth; i > 0; i -= 2) 23184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber { 23194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber temp = *((uint16*)pRef); 23204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *((uint16*)pOut) = temp; 23214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pRef += 2; 23224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pOut += 2; 23234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 23244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pOut += offset_out; 23254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber pRef += offset_in; 23264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 23274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber } 23284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber return ; 23294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber} 2330