11cc31e629e8132df390ae692873c847d1c2f62c0James Dong/* ------------------------------------------------------------------ 21cc31e629e8132df390ae692873c847d1c2f62c0James Dong * Copyright (C) 1998-2009 PacketVideo 31cc31e629e8132df390ae692873c847d1c2f62c0James Dong * 41cc31e629e8132df390ae692873c847d1c2f62c0James Dong * Licensed under the Apache License, Version 2.0 (the "License"); 51cc31e629e8132df390ae692873c847d1c2f62c0James Dong * you may not use this file except in compliance with the License. 61cc31e629e8132df390ae692873c847d1c2f62c0James Dong * You may obtain a copy of the License at 71cc31e629e8132df390ae692873c847d1c2f62c0James Dong * 81cc31e629e8132df390ae692873c847d1c2f62c0James Dong * http://www.apache.org/licenses/LICENSE-2.0 91cc31e629e8132df390ae692873c847d1c2f62c0James Dong * 101cc31e629e8132df390ae692873c847d1c2f62c0James Dong * Unless required by applicable law or agreed to in writing, software 111cc31e629e8132df390ae692873c847d1c2f62c0James Dong * distributed under the License is distributed on an "AS IS" BASIS, 121cc31e629e8132df390ae692873c847d1c2f62c0James Dong * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 131cc31e629e8132df390ae692873c847d1c2f62c0James Dong * express or implied. 141cc31e629e8132df390ae692873c847d1c2f62c0James Dong * See the License for the specific language governing permissions 151cc31e629e8132df390ae692873c847d1c2f62c0James Dong * and limitations under the License. 161cc31e629e8132df390ae692873c847d1c2f62c0James Dong * ------------------------------------------------------------------- 171cc31e629e8132df390ae692873c847d1c2f62c0James Dong */ 181cc31e629e8132df390ae692873c847d1c2f62c0James Dong#include "avcenc_lib.h" 191cc31e629e8132df390ae692873c847d1c2f62c0James Dong#include "avcenc_int.h" 201cc31e629e8132df390ae692873c847d1c2f62c0James Dong 211cc31e629e8132df390ae692873c847d1c2f62c0James Dong 221cc31e629e8132df390ae692873c847d1c2f62c0James Dong#define CLIP_RESULT(x) if((uint)x > 0xFF){ \ 231cc31e629e8132df390ae692873c847d1c2f62c0James Dong x = 0xFF & (~(x>>31));} 241cc31e629e8132df390ae692873c847d1c2f62c0James Dong 251cc31e629e8132df390ae692873c847d1c2f62c0James Dong/* (blkwidth << 2) + (dy << 1) + dx */ 261cc31e629e8132df390ae692873c847d1c2f62c0James Dongstatic void (*const eChromaMC_SIMD[8])(uint8 *, int , int , int , uint8 *, int, int , int) = 271cc31e629e8132df390ae692873c847d1c2f62c0James Dong{ 281cc31e629e8132df390ae692873c847d1c2f62c0James Dong &eChromaFullMC_SIMD, 291cc31e629e8132df390ae692873c847d1c2f62c0James Dong &eChromaHorizontalMC_SIMD, 301cc31e629e8132df390ae692873c847d1c2f62c0James Dong &eChromaVerticalMC_SIMD, 311cc31e629e8132df390ae692873c847d1c2f62c0James Dong &eChromaDiagonalMC_SIMD, 321cc31e629e8132df390ae692873c847d1c2f62c0James Dong &eChromaFullMC_SIMD, 331cc31e629e8132df390ae692873c847d1c2f62c0James Dong &eChromaHorizontalMC2_SIMD, 341cc31e629e8132df390ae692873c847d1c2f62c0James Dong &eChromaVerticalMC2_SIMD, 351cc31e629e8132df390ae692873c847d1c2f62c0James Dong &eChromaDiagonalMC2_SIMD 361cc31e629e8132df390ae692873c847d1c2f62c0James Dong}; 371cc31e629e8132df390ae692873c847d1c2f62c0James Dong/* Perform motion prediction and compensation with residue if exist. */ 381cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid AVCMBMotionComp(AVCEncObject *encvid, AVCCommonObj *video) 391cc31e629e8132df390ae692873c847d1c2f62c0James Dong{ 401cc31e629e8132df390ae692873c847d1c2f62c0James Dong (void)(encvid); 411cc31e629e8132df390ae692873c847d1c2f62c0James Dong 421cc31e629e8132df390ae692873c847d1c2f62c0James Dong AVCMacroblock *currMB = video->currMB; 431cc31e629e8132df390ae692873c847d1c2f62c0James Dong AVCPictureData *currPic = video->currPic; 441cc31e629e8132df390ae692873c847d1c2f62c0James Dong int mbPartIdx, subMbPartIdx; 451cc31e629e8132df390ae692873c847d1c2f62c0James Dong int ref_idx; 461cc31e629e8132df390ae692873c847d1c2f62c0James Dong int offset_MbPart_indx = 0; 471cc31e629e8132df390ae692873c847d1c2f62c0James Dong int16 *mv; 481cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint32 x_pos, y_pos; 491cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *curL, *curCb, *curCr; 501cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *ref_l, *ref_Cb, *ref_Cr; 511cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *predBlock, *predCb, *predCr; 521cc31e629e8132df390ae692873c847d1c2f62c0James Dong int block_x, block_y, offset_x, offset_y, offsetP, offset; 531cc31e629e8132df390ae692873c847d1c2f62c0James Dong int x_position = (video->mb_x << 4); 541cc31e629e8132df390ae692873c847d1c2f62c0James Dong int y_position = (video->mb_y << 4); 551cc31e629e8132df390ae692873c847d1c2f62c0James Dong int MbHeight, MbWidth, mbPartIdx_X, mbPartIdx_Y, offset_indx; 561cc31e629e8132df390ae692873c847d1c2f62c0James Dong int picWidth = currPic->width; 571cc31e629e8132df390ae692873c847d1c2f62c0James Dong int picPitch = currPic->pitch; 581cc31e629e8132df390ae692873c847d1c2f62c0James Dong int picHeight = currPic->height; 591cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint32 tmp_word; 601cc31e629e8132df390ae692873c847d1c2f62c0James Dong 611cc31e629e8132df390ae692873c847d1c2f62c0James Dong tmp_word = y_position * picPitch; 621cc31e629e8132df390ae692873c847d1c2f62c0James Dong curL = currPic->Sl + tmp_word + x_position; 631cc31e629e8132df390ae692873c847d1c2f62c0James Dong offset = (tmp_word >> 2) + (x_position >> 1); 641cc31e629e8132df390ae692873c847d1c2f62c0James Dong curCb = currPic->Scb + offset; 651cc31e629e8132df390ae692873c847d1c2f62c0James Dong curCr = currPic->Scr + offset; 661cc31e629e8132df390ae692873c847d1c2f62c0James Dong 671cc31e629e8132df390ae692873c847d1c2f62c0James Dong predBlock = curL; 681cc31e629e8132df390ae692873c847d1c2f62c0James Dong predCb = curCb; 691cc31e629e8132df390ae692873c847d1c2f62c0James Dong predCr = curCr; 701cc31e629e8132df390ae692873c847d1c2f62c0James Dong 711cc31e629e8132df390ae692873c847d1c2f62c0James Dong GetMotionVectorPredictor(video, 1); 721cc31e629e8132df390ae692873c847d1c2f62c0James Dong 731cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (mbPartIdx = 0; mbPartIdx < currMB->NumMbPart; mbPartIdx++) 741cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 751cc31e629e8132df390ae692873c847d1c2f62c0James Dong MbHeight = currMB->SubMbPartHeight[mbPartIdx]; 761cc31e629e8132df390ae692873c847d1c2f62c0James Dong MbWidth = currMB->SubMbPartWidth[mbPartIdx]; 771cc31e629e8132df390ae692873c847d1c2f62c0James Dong mbPartIdx_X = ((mbPartIdx + offset_MbPart_indx) & 1); 781cc31e629e8132df390ae692873c847d1c2f62c0James Dong mbPartIdx_Y = (mbPartIdx + offset_MbPart_indx) >> 1; 791cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref_idx = currMB->ref_idx_L0[(mbPartIdx_Y << 1) + mbPartIdx_X]; 801cc31e629e8132df390ae692873c847d1c2f62c0James Dong offset_indx = 0; 811cc31e629e8132df390ae692873c847d1c2f62c0James Dong 821cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref_l = video->RefPicList0[ref_idx]->Sl; 831cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref_Cb = video->RefPicList0[ref_idx]->Scb; 841cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref_Cr = video->RefPicList0[ref_idx]->Scr; 851cc31e629e8132df390ae692873c847d1c2f62c0James Dong 861cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (subMbPartIdx = 0; subMbPartIdx < currMB->NumSubMbPart[mbPartIdx]; subMbPartIdx++) 871cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 881cc31e629e8132df390ae692873c847d1c2f62c0James Dong block_x = (mbPartIdx_X << 1) + ((subMbPartIdx + offset_indx) & 1); 891cc31e629e8132df390ae692873c847d1c2f62c0James Dong block_y = (mbPartIdx_Y << 1) + (((subMbPartIdx + offset_indx) >> 1) & 1); 901cc31e629e8132df390ae692873c847d1c2f62c0James Dong mv = (int16*)(currMB->mvL0 + block_x + (block_y << 2)); 911cc31e629e8132df390ae692873c847d1c2f62c0James Dong offset_x = x_position + (block_x << 2); 921cc31e629e8132df390ae692873c847d1c2f62c0James Dong offset_y = y_position + (block_y << 2); 931cc31e629e8132df390ae692873c847d1c2f62c0James Dong x_pos = (offset_x << 2) + *mv++; /*quarter pel */ 941cc31e629e8132df390ae692873c847d1c2f62c0James Dong y_pos = (offset_y << 2) + *mv; /*quarter pel */ 951cc31e629e8132df390ae692873c847d1c2f62c0James Dong 961cc31e629e8132df390ae692873c847d1c2f62c0James Dong //offset = offset_y * currPic->width; 971cc31e629e8132df390ae692873c847d1c2f62c0James Dong //offsetC = (offset >> 2) + (offset_x >> 1); 981cc31e629e8132df390ae692873c847d1c2f62c0James Dong offsetP = (block_y << 2) * picPitch + (block_x << 2); 991cc31e629e8132df390ae692873c847d1c2f62c0James Dong eLumaMotionComp(ref_l, picPitch, picHeight, x_pos, y_pos, 1001cc31e629e8132df390ae692873c847d1c2f62c0James Dong /*comp_Sl + offset + offset_x,*/ 1011cc31e629e8132df390ae692873c847d1c2f62c0James Dong predBlock + offsetP, picPitch, MbWidth, MbHeight); 1021cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1031cc31e629e8132df390ae692873c847d1c2f62c0James Dong offsetP = (block_y * picWidth) + (block_x << 1); 1041cc31e629e8132df390ae692873c847d1c2f62c0James Dong eChromaMotionComp(ref_Cb, picWidth >> 1, picHeight >> 1, x_pos, y_pos, 1051cc31e629e8132df390ae692873c847d1c2f62c0James Dong /*comp_Scb + offsetC,*/ 1061cc31e629e8132df390ae692873c847d1c2f62c0James Dong predCb + offsetP, picPitch >> 1, MbWidth >> 1, MbHeight >> 1); 1071cc31e629e8132df390ae692873c847d1c2f62c0James Dong eChromaMotionComp(ref_Cr, picWidth >> 1, picHeight >> 1, x_pos, y_pos, 1081cc31e629e8132df390ae692873c847d1c2f62c0James Dong /*comp_Scr + offsetC,*/ 1091cc31e629e8132df390ae692873c847d1c2f62c0James Dong predCr + offsetP, picPitch >> 1, MbWidth >> 1, MbHeight >> 1); 1101cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1111cc31e629e8132df390ae692873c847d1c2f62c0James Dong offset_indx = currMB->SubMbPartWidth[mbPartIdx] >> 3; 1121cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 1131cc31e629e8132df390ae692873c847d1c2f62c0James Dong offset_MbPart_indx = currMB->MbPartWidth >> 4; 1141cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 1151cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1161cc31e629e8132df390ae692873c847d1c2f62c0James Dong return ; 1171cc31e629e8132df390ae692873c847d1c2f62c0James Dong} 1181cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1191cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1201cc31e629e8132df390ae692873c847d1c2f62c0James Dong/* preform the actual motion comp here */ 1211cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eLumaMotionComp(uint8 *ref, int picpitch, int picheight, 1221cc31e629e8132df390ae692873c847d1c2f62c0James Dong int x_pos, int y_pos, 1231cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *pred, int pred_pitch, 1241cc31e629e8132df390ae692873c847d1c2f62c0James Dong int blkwidth, int blkheight) 1251cc31e629e8132df390ae692873c847d1c2f62c0James Dong{ 1261cc31e629e8132df390ae692873c847d1c2f62c0James Dong (void)(picheight); 1271cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1281cc31e629e8132df390ae692873c847d1c2f62c0James Dong int dx, dy; 1291cc31e629e8132df390ae692873c847d1c2f62c0James Dong int temp2[21][21]; /* for intermediate results */ 1301cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *ref2; 1311cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1321cc31e629e8132df390ae692873c847d1c2f62c0James Dong dx = x_pos & 3; 1331cc31e629e8132df390ae692873c847d1c2f62c0James Dong dy = y_pos & 3; 1341cc31e629e8132df390ae692873c847d1c2f62c0James Dong x_pos = x_pos >> 2; /* round it to full-pel resolution */ 1351cc31e629e8132df390ae692873c847d1c2f62c0James Dong y_pos = y_pos >> 2; 1361cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1371cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* perform actual motion compensation */ 1381cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (dx == 0 && dy == 0) 1391cc31e629e8132df390ae692873c847d1c2f62c0James Dong { /* fullpel position *//* G */ 1401cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1411cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref += y_pos * picpitch + x_pos; 1421cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1431cc31e629e8132df390ae692873c847d1c2f62c0James Dong eFullPelMC(ref, picpitch, pred, pred_pitch, blkwidth, blkheight); 1441cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1451cc31e629e8132df390ae692873c847d1c2f62c0James Dong } /* other positions */ 1461cc31e629e8132df390ae692873c847d1c2f62c0James Dong else if (dy == 0) 1471cc31e629e8132df390ae692873c847d1c2f62c0James Dong { /* no vertical interpolation *//* a,b,c*/ 1481cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1491cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref += y_pos * picpitch + x_pos; 1501cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1511cc31e629e8132df390ae692873c847d1c2f62c0James Dong eHorzInterp1MC(ref, picpitch, pred, pred_pitch, blkwidth, blkheight, dx); 1521cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 1531cc31e629e8132df390ae692873c847d1c2f62c0James Dong else if (dx == 0) 1541cc31e629e8132df390ae692873c847d1c2f62c0James Dong { /*no horizontal interpolation *//* d,h,n */ 1551cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1561cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref += y_pos * picpitch + x_pos; 1571cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1581cc31e629e8132df390ae692873c847d1c2f62c0James Dong eVertInterp1MC(ref, picpitch, pred, pred_pitch, blkwidth, blkheight, dy); 1591cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 1601cc31e629e8132df390ae692873c847d1c2f62c0James Dong else if (dy == 2) 1611cc31e629e8132df390ae692873c847d1c2f62c0James Dong { /* horizontal cross *//* i, j, k */ 1621cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1631cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref += y_pos * picpitch + x_pos - 2; /* move to the left 2 pixels */ 1641cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1651cc31e629e8132df390ae692873c847d1c2f62c0James Dong eVertInterp2MC(ref, picpitch, &temp2[0][0], 21, blkwidth + 5, blkheight); 1661cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1671cc31e629e8132df390ae692873c847d1c2f62c0James Dong eHorzInterp2MC(&temp2[0][2], 21, pred, pred_pitch, blkwidth, blkheight, dx); 1681cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 1691cc31e629e8132df390ae692873c847d1c2f62c0James Dong else if (dx == 2) 1701cc31e629e8132df390ae692873c847d1c2f62c0James Dong { /* vertical cross */ /* f,q */ 1711cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1721cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref += (y_pos - 2) * picpitch + x_pos; /* move to up 2 lines */ 1731cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1741cc31e629e8132df390ae692873c847d1c2f62c0James Dong eHorzInterp3MC(ref, picpitch, &temp2[0][0], 21, blkwidth, blkheight + 5); 1751cc31e629e8132df390ae692873c847d1c2f62c0James Dong eVertInterp3MC(&temp2[2][0], 21, pred, pred_pitch, blkwidth, blkheight, dy); 1761cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 1771cc31e629e8132df390ae692873c847d1c2f62c0James Dong else 1781cc31e629e8132df390ae692873c847d1c2f62c0James Dong { /* diagonal *//* e,g,p,r */ 1791cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1801cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref2 = ref + (y_pos + (dy / 2)) * picpitch + x_pos; 1811cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1821cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref += (y_pos * picpitch) + x_pos + (dx / 2); 1831cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1841cc31e629e8132df390ae692873c847d1c2f62c0James Dong eDiagonalInterpMC(ref2, ref, picpitch, pred, pred_pitch, blkwidth, blkheight); 1851cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 1861cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1871cc31e629e8132df390ae692873c847d1c2f62c0James Dong return ; 1881cc31e629e8132df390ae692873c847d1c2f62c0James Dong} 1891cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1901cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eCreateAlign(uint8 *ref, int picpitch, int y_pos, 1911cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *out, int blkwidth, int blkheight) 1921cc31e629e8132df390ae692873c847d1c2f62c0James Dong{ 1931cc31e629e8132df390ae692873c847d1c2f62c0James Dong int i, j; 1941cc31e629e8132df390ae692873c847d1c2f62c0James Dong int offset, out_offset; 1951cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint32 prev_pix, result, pix1, pix2, pix4; 1961cc31e629e8132df390ae692873c847d1c2f62c0James Dong 1971cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref += y_pos * picpitch;// + x_pos; 1981cc31e629e8132df390ae692873c847d1c2f62c0James Dong out_offset = 24 - blkwidth; 1991cc31e629e8132df390ae692873c847d1c2f62c0James Dong 2001cc31e629e8132df390ae692873c847d1c2f62c0James Dong //switch(x_pos&0x3){ 2011cc31e629e8132df390ae692873c847d1c2f62c0James Dong switch (((uint32)ref)&0x3) 2021cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 2031cc31e629e8132df390ae692873c847d1c2f62c0James Dong case 1: 2041cc31e629e8132df390ae692873c847d1c2f62c0James Dong offset = picpitch - blkwidth - 3; 2051cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (j = 0; j < blkheight; j++) 2061cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 2071cc31e629e8132df390ae692873c847d1c2f62c0James Dong pix1 = *ref++; 2081cc31e629e8132df390ae692873c847d1c2f62c0James Dong pix2 = *((uint16*)ref); 2091cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref += 2; 2101cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (pix2 << 8) | pix1; 2111cc31e629e8132df390ae692873c847d1c2f62c0James Dong 2121cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (i = 3; i < blkwidth; i += 4) 2131cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 2141cc31e629e8132df390ae692873c847d1c2f62c0James Dong pix4 = *((uint32*)ref); 2151cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref += 4; 2161cc31e629e8132df390ae692873c847d1c2f62c0James Dong prev_pix = (pix4 << 24) & 0xFF000000; /* mask out byte belong to previous word */ 2171cc31e629e8132df390ae692873c847d1c2f62c0James Dong result |= prev_pix; 2181cc31e629e8132df390ae692873c847d1c2f62c0James Dong *((uint32*)out) = result; /* write 4 bytes */ 2191cc31e629e8132df390ae692873c847d1c2f62c0James Dong out += 4; 2201cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = pix4 >> 8; /* for the next loop */ 2211cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 2221cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref += offset; 2231cc31e629e8132df390ae692873c847d1c2f62c0James Dong out += out_offset; 2241cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 2251cc31e629e8132df390ae692873c847d1c2f62c0James Dong break; 2261cc31e629e8132df390ae692873c847d1c2f62c0James Dong case 2: 2271cc31e629e8132df390ae692873c847d1c2f62c0James Dong offset = picpitch - blkwidth - 2; 2281cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (j = 0; j < blkheight; j++) 2291cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 2301cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = *((uint16*)ref); 2311cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref += 2; 2321cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (i = 2; i < blkwidth; i += 4) 2331cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 2341cc31e629e8132df390ae692873c847d1c2f62c0James Dong pix4 = *((uint32*)ref); 2351cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref += 4; 2361cc31e629e8132df390ae692873c847d1c2f62c0James Dong prev_pix = (pix4 << 16) & 0xFFFF0000; /* mask out byte belong to previous word */ 2371cc31e629e8132df390ae692873c847d1c2f62c0James Dong result |= prev_pix; 2381cc31e629e8132df390ae692873c847d1c2f62c0James Dong *((uint32*)out) = result; /* write 4 bytes */ 2391cc31e629e8132df390ae692873c847d1c2f62c0James Dong out += 4; 2401cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = pix4 >> 16; /* for the next loop */ 2411cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 2421cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref += offset; 2431cc31e629e8132df390ae692873c847d1c2f62c0James Dong out += out_offset; 2441cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 2451cc31e629e8132df390ae692873c847d1c2f62c0James Dong break; 2461cc31e629e8132df390ae692873c847d1c2f62c0James Dong case 3: 2471cc31e629e8132df390ae692873c847d1c2f62c0James Dong offset = picpitch - blkwidth - 1; 2481cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (j = 0; j < blkheight; j++) 2491cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 2501cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = *ref++; 2511cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (i = 1; i < blkwidth; i += 4) 2521cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 2531cc31e629e8132df390ae692873c847d1c2f62c0James Dong pix4 = *((uint32*)ref); 2541cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref += 4; 2551cc31e629e8132df390ae692873c847d1c2f62c0James Dong prev_pix = (pix4 << 8) & 0xFFFFFF00; /* mask out byte belong to previous word */ 2561cc31e629e8132df390ae692873c847d1c2f62c0James Dong result |= prev_pix; 2571cc31e629e8132df390ae692873c847d1c2f62c0James Dong *((uint32*)out) = result; /* write 4 bytes */ 2581cc31e629e8132df390ae692873c847d1c2f62c0James Dong out += 4; 2591cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = pix4 >> 24; /* for the next loop */ 2601cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 2611cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref += offset; 2621cc31e629e8132df390ae692873c847d1c2f62c0James Dong out += out_offset; 2631cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 2641cc31e629e8132df390ae692873c847d1c2f62c0James Dong break; 2651cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 2661cc31e629e8132df390ae692873c847d1c2f62c0James Dong} 2671cc31e629e8132df390ae692873c847d1c2f62c0James Dong 2681cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eHorzInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch, 2691cc31e629e8132df390ae692873c847d1c2f62c0James Dong int blkwidth, int blkheight, int dx) 2701cc31e629e8132df390ae692873c847d1c2f62c0James Dong{ 2711cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *p_ref; 2721cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint32 *p_cur; 2731cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint32 tmp, pkres; 2741cc31e629e8132df390ae692873c847d1c2f62c0James Dong int result, curr_offset, ref_offset; 2751cc31e629e8132df390ae692873c847d1c2f62c0James Dong int j; 2761cc31e629e8132df390ae692873c847d1c2f62c0James Dong int32 r0, r1, r2, r3, r4, r5; 2771cc31e629e8132df390ae692873c847d1c2f62c0James Dong int32 r13, r6; 2781cc31e629e8132df390ae692873c847d1c2f62c0James Dong 2791cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur = (uint32*)out; /* assume it's word aligned */ 2801cc31e629e8132df390ae692873c847d1c2f62c0James Dong curr_offset = (outpitch - blkwidth) >> 2; 2811cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref = in; 2821cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref_offset = inpitch - blkwidth; 2831cc31e629e8132df390ae692873c847d1c2f62c0James Dong 2841cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (dx&1) 2851cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 2861cc31e629e8132df390ae692873c847d1c2f62c0James Dong dx = ((dx >> 1) ? -3 : -4); /* use in 3/4 pel */ 2871cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref -= 2; 2881cc31e629e8132df390ae692873c847d1c2f62c0James Dong r13 = 0; 2891cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (j = blkheight; j > 0; j--) 2901cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 2911cc31e629e8132df390ae692873c847d1c2f62c0James Dong tmp = (uint32)(p_ref + blkwidth); 2921cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = p_ref[0]; 2931cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = p_ref[2]; 2941cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 |= (r1 << 16); /* 0,c,0,a */ 2951cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = p_ref[1]; 2961cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = p_ref[3]; 2971cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 |= (r2 << 16); /* 0,d,0,b */ 2981cc31e629e8132df390ae692873c847d1c2f62c0James Dong while ((uint32)p_ref < tmp) 2991cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 3001cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *(p_ref += 4); /* move pointer to e */ 3011cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = p_ref[2]; 3021cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 |= (r3 << 16); /* 0,g,0,e */ 3031cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = p_ref[1]; 3041cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 = p_ref[3]; 3051cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 |= (r4 << 16); /* 0,h,0,f */ 3061cc31e629e8132df390ae692873c847d1c2f62c0James Dong 3071cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 = r0 + r3; /* c+h, a+f */ 3081cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = r0 + r1; /* c+d, a+b */ 3091cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 = r2 + r3; /* g+h, e+f */ 3101cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 >>= 16; 3111cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 |= (r6 << 16); /* e+f, c+d */ 3121cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 += r5 * 20; /* c+20*e+20*f+h, a+20*c+20*d+f */ 3131cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 += 0x100010; /* +16, +16 */ 3141cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = r1 + r2; /* d+g, b+e */ 3151cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 -= r5 * 5; /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */ 3161cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 >>= 5; 3171cc31e629e8132df390ae692873c847d1c2f62c0James Dong r13 |= r4; /* check clipping */ 3181cc31e629e8132df390ae692873c847d1c2f62c0James Dong 3191cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = p_ref[dx+2]; 3201cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 = p_ref[dx+4]; 3211cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 |= (r6 << 16); 3221cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 += r5; 3231cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 += 0x10001; 3241cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 = (r4 >> 1) & 0xFF00FF; 3251cc31e629e8132df390ae692873c847d1c2f62c0James Dong 3261cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = p_ref[4]; /* i */ 3271cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 = (r5 << 16); 3281cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = r6 | (r2 >> 16);/* 0,i,0,g */ 3291cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 += r1; /* d+i, b+g */ /* r5 not free */ 3301cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 >>= 16; 3311cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */ 3321cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 += r2; /* f+g, d+e */ 3331cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 += 20 * r1; /* d+20f+20g+i, b+20d+20e+g */ 3341cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 >>= 16; 3351cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */ 3361cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 += r3; /* e+h, c+f */ 3371cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 += 0x100010; /* 16,16 */ 3381cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 -= r0 * 5; /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */ 3391cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 >>= 5; 3401cc31e629e8132df390ae692873c847d1c2f62c0James Dong r13 |= r5; /* check clipping */ 3411cc31e629e8132df390ae692873c847d1c2f62c0James Dong 3421cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = p_ref[dx+3]; 3431cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = p_ref[dx+5]; 3441cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 |= (r1 << 16); 3451cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 += r0; 3461cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 += 0x10001; 3471cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = (r5 >> 1) & 0xFF00FF; 3481cc31e629e8132df390ae692873c847d1c2f62c0James Dong 3491cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 |= (r5 << 8); /* pack them together */ 3501cc31e629e8132df390ae692873c847d1c2f62c0James Dong *p_cur++ = r4; 3511cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = r3; 3521cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = r2; 3531cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 3541cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur += curr_offset; /* move to the next line */ 3551cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */ 3561cc31e629e8132df390ae692873c847d1c2f62c0James Dong 3571cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (r13&0xFF000700) /* need clipping */ 3581cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 3591cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* move back to the beginning of the line */ 3601cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref -= (ref_offset + blkwidth); /* input */ 3611cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur -= (outpitch >> 2); 3621cc31e629e8132df390ae692873c847d1c2f62c0James Dong 3631cc31e629e8132df390ae692873c847d1c2f62c0James Dong tmp = (uint32)(p_ref + blkwidth); 3641cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (; (uint32)p_ref < tmp;) 3651cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 3661cc31e629e8132df390ae692873c847d1c2f62c0James Dong 3671cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *p_ref++; 3681cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *p_ref++; 3691cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *p_ref++; 3701cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = *p_ref++; 3711cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 = *p_ref++; 3721cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* first pixel */ 3731cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = *p_ref++; 3741cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r0 + r5); 3751cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r1 + r4); 3761cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 3771cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r2 + r3); 3781cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 3791cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 3801cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 3811cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* 3/4 pel, no need to clip */ 3821cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + p_ref[dx] + 1); 3831cc31e629e8132df390ae692873c847d1c2f62c0James Dong pkres = (result >> 1) ; 3841cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* second pixel */ 3851cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *p_ref++; 3861cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r1 + r0); 3871cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r2 + r5); 3881cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 3891cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r3 + r4); 3901cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 3911cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 3921cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 3931cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* 3/4 pel, no need to clip */ 3941cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + p_ref[dx] + 1); 3951cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result >> 1); 3961cc31e629e8132df390ae692873c847d1c2f62c0James Dong pkres |= (result << 8); 3971cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* third pixel */ 3981cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *p_ref++; 3991cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r2 + r1); 4001cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r3 + r0); 4011cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 4021cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r4 + r5); 4031cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 4041cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 4051cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 4061cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* 3/4 pel, no need to clip */ 4071cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + p_ref[dx] + 1); 4081cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result >> 1); 4091cc31e629e8132df390ae692873c847d1c2f62c0James Dong pkres |= (result << 16); 4101cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* fourth pixel */ 4111cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *p_ref++; 4121cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r3 + r2); 4131cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r4 + r1); 4141cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 4151cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r5 + r0); 4161cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 4171cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 4181cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 4191cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* 3/4 pel, no need to clip */ 4201cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + p_ref[dx] + 1); 4211cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result >> 1); 4221cc31e629e8132df390ae692873c847d1c2f62c0James Dong pkres |= (result << 24); 4231cc31e629e8132df390ae692873c847d1c2f62c0James Dong *p_cur++ = pkres; /* write 4 pixels */ 4241cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref -= 5; /* offset back to the middle of filter */ 4251cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 4261cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur += curr_offset; /* move to the next line */ 4271cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref += ref_offset; /* move to the next line */ 4281cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 4291cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 4301cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 4311cc31e629e8132df390ae692873c847d1c2f62c0James Dong else 4321cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 4331cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref -= 2; 4341cc31e629e8132df390ae692873c847d1c2f62c0James Dong r13 = 0; 4351cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (j = blkheight; j > 0; j--) 4361cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 4371cc31e629e8132df390ae692873c847d1c2f62c0James Dong tmp = (uint32)(p_ref + blkwidth); 4381cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = p_ref[0]; 4391cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = p_ref[2]; 4401cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 |= (r1 << 16); /* 0,c,0,a */ 4411cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = p_ref[1]; 4421cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = p_ref[3]; 4431cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 |= (r2 << 16); /* 0,d,0,b */ 4441cc31e629e8132df390ae692873c847d1c2f62c0James Dong while ((uint32)p_ref < tmp) 4451cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 4461cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *(p_ref += 4); /* move pointer to e */ 4471cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = p_ref[2]; 4481cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 |= (r3 << 16); /* 0,g,0,e */ 4491cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = p_ref[1]; 4501cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 = p_ref[3]; 4511cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 |= (r4 << 16); /* 0,h,0,f */ 4521cc31e629e8132df390ae692873c847d1c2f62c0James Dong 4531cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 = r0 + r3; /* c+h, a+f */ 4541cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = r0 + r1; /* c+d, a+b */ 4551cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 = r2 + r3; /* g+h, e+f */ 4561cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 >>= 16; 4571cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 |= (r6 << 16); /* e+f, c+d */ 4581cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 += r5 * 20; /* c+20*e+20*f+h, a+20*c+20*d+f */ 4591cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 += 0x100010; /* +16, +16 */ 4601cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = r1 + r2; /* d+g, b+e */ 4611cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 -= r5 * 5; /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */ 4621cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 >>= 5; 4631cc31e629e8132df390ae692873c847d1c2f62c0James Dong r13 |= r4; /* check clipping */ 4641cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 &= 0xFF00FF; /* mask */ 4651cc31e629e8132df390ae692873c847d1c2f62c0James Dong 4661cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = p_ref[4]; /* i */ 4671cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 = (r5 << 16); 4681cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = r6 | (r2 >> 16);/* 0,i,0,g */ 4691cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 += r1; /* d+i, b+g */ /* r5 not free */ 4701cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 >>= 16; 4711cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */ 4721cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 += r2; /* f+g, d+e */ 4731cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 += 20 * r1; /* d+20f+20g+i, b+20d+20e+g */ 4741cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 >>= 16; 4751cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */ 4761cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 += r3; /* e+h, c+f */ 4771cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 += 0x100010; /* 16,16 */ 4781cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 -= r0 * 5; /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */ 4791cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 >>= 5; 4801cc31e629e8132df390ae692873c847d1c2f62c0James Dong r13 |= r5; /* check clipping */ 4811cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 &= 0xFF00FF; /* mask */ 4821cc31e629e8132df390ae692873c847d1c2f62c0James Dong 4831cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 |= (r5 << 8); /* pack them together */ 4841cc31e629e8132df390ae692873c847d1c2f62c0James Dong *p_cur++ = r4; 4851cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = r3; 4861cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = r2; 4871cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 4881cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur += curr_offset; /* move to the next line */ 4891cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */ 4901cc31e629e8132df390ae692873c847d1c2f62c0James Dong 4911cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (r13&0xFF000700) /* need clipping */ 4921cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 4931cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* move back to the beginning of the line */ 4941cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref -= (ref_offset + blkwidth); /* input */ 4951cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur -= (outpitch >> 2); 4961cc31e629e8132df390ae692873c847d1c2f62c0James Dong 4971cc31e629e8132df390ae692873c847d1c2f62c0James Dong tmp = (uint32)(p_ref + blkwidth); 4981cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (; (uint32)p_ref < tmp;) 4991cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 5001cc31e629e8132df390ae692873c847d1c2f62c0James Dong 5011cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *p_ref++; 5021cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *p_ref++; 5031cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *p_ref++; 5041cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = *p_ref++; 5051cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 = *p_ref++; 5061cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* first pixel */ 5071cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = *p_ref++; 5081cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r0 + r5); 5091cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r1 + r4); 5101cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 5111cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r2 + r3); 5121cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 5131cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 5141cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 5151cc31e629e8132df390ae692873c847d1c2f62c0James Dong pkres = result; 5161cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* second pixel */ 5171cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *p_ref++; 5181cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r1 + r0); 5191cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r2 + r5); 5201cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 5211cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r3 + r4); 5221cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 5231cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 5241cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 5251cc31e629e8132df390ae692873c847d1c2f62c0James Dong pkres |= (result << 8); 5261cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* third pixel */ 5271cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *p_ref++; 5281cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r2 + r1); 5291cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r3 + r0); 5301cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 5311cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r4 + r5); 5321cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 5331cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 5341cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 5351cc31e629e8132df390ae692873c847d1c2f62c0James Dong pkres |= (result << 16); 5361cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* fourth pixel */ 5371cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *p_ref++; 5381cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r3 + r2); 5391cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r4 + r1); 5401cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 5411cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r5 + r0); 5421cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 5431cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 5441cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 5451cc31e629e8132df390ae692873c847d1c2f62c0James Dong pkres |= (result << 24); 5461cc31e629e8132df390ae692873c847d1c2f62c0James Dong *p_cur++ = pkres; /* write 4 pixels */ 5471cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref -= 5; 5481cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 5491cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur += curr_offset; /* move to the next line */ 5501cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref += ref_offset; 5511cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 5521cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 5531cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 5541cc31e629e8132df390ae692873c847d1c2f62c0James Dong 5551cc31e629e8132df390ae692873c847d1c2f62c0James Dong return ; 5561cc31e629e8132df390ae692873c847d1c2f62c0James Dong} 5571cc31e629e8132df390ae692873c847d1c2f62c0James Dong 5581cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eHorzInterp2MC(int *in, int inpitch, uint8 *out, int outpitch, 5591cc31e629e8132df390ae692873c847d1c2f62c0James Dong int blkwidth, int blkheight, int dx) 5601cc31e629e8132df390ae692873c847d1c2f62c0James Dong{ 5611cc31e629e8132df390ae692873c847d1c2f62c0James Dong int *p_ref; 5621cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint32 *p_cur; 5631cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint32 tmp, pkres; 5641cc31e629e8132df390ae692873c847d1c2f62c0James Dong int result, result2, curr_offset, ref_offset; 5651cc31e629e8132df390ae692873c847d1c2f62c0James Dong int j, r0, r1, r2, r3, r4, r5; 5661cc31e629e8132df390ae692873c847d1c2f62c0James Dong 5671cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur = (uint32*)out; /* assume it's word aligned */ 5681cc31e629e8132df390ae692873c847d1c2f62c0James Dong curr_offset = (outpitch - blkwidth) >> 2; 5691cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref = in; 5701cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref_offset = inpitch - blkwidth; 5711cc31e629e8132df390ae692873c847d1c2f62c0James Dong 5721cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (dx&1) 5731cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 5741cc31e629e8132df390ae692873c847d1c2f62c0James Dong dx = ((dx >> 1) ? -3 : -4); /* use in 3/4 pel */ 5751cc31e629e8132df390ae692873c847d1c2f62c0James Dong 5761cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (j = blkheight; j > 0 ; j--) 5771cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 5781cc31e629e8132df390ae692873c847d1c2f62c0James Dong tmp = (uint32)(p_ref + blkwidth); 5791cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (; (uint32)p_ref < tmp;) 5801cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 5811cc31e629e8132df390ae692873c847d1c2f62c0James Dong 5821cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = p_ref[-2]; 5831cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = p_ref[-1]; 5841cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *p_ref++; 5851cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = *p_ref++; 5861cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 = *p_ref++; 5871cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* first pixel */ 5881cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = *p_ref++; 5891cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r0 + r5); 5901cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r1 + r4); 5911cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 5921cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r2 + r3); 5931cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 5941cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 512) >> 10; 5951cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 5961cc31e629e8132df390ae692873c847d1c2f62c0James Dong result2 = ((p_ref[dx] + 16) >> 5); 5971cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result2) 5981cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* 3/4 pel, no need to clip */ 5991cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + result2 + 1); 6001cc31e629e8132df390ae692873c847d1c2f62c0James Dong pkres = (result >> 1); 6011cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* second pixel */ 6021cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *p_ref++; 6031cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r1 + r0); 6041cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r2 + r5); 6051cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 6061cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r3 + r4); 6071cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 6081cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 512) >> 10; 6091cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 6101cc31e629e8132df390ae692873c847d1c2f62c0James Dong result2 = ((p_ref[dx] + 16) >> 5); 6111cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result2) 6121cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* 3/4 pel, no need to clip */ 6131cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + result2 + 1); 6141cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result >> 1); 6151cc31e629e8132df390ae692873c847d1c2f62c0James Dong pkres |= (result << 8); 6161cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* third pixel */ 6171cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *p_ref++; 6181cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r2 + r1); 6191cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r3 + r0); 6201cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 6211cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r4 + r5); 6221cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 6231cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 512) >> 10; 6241cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 6251cc31e629e8132df390ae692873c847d1c2f62c0James Dong result2 = ((p_ref[dx] + 16) >> 5); 6261cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result2) 6271cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* 3/4 pel, no need to clip */ 6281cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + result2 + 1); 6291cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result >> 1); 6301cc31e629e8132df390ae692873c847d1c2f62c0James Dong pkres |= (result << 16); 6311cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* fourth pixel */ 6321cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *p_ref++; 6331cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r3 + r2); 6341cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r4 + r1); 6351cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 6361cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r5 + r0); 6371cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 6381cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 512) >> 10; 6391cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 6401cc31e629e8132df390ae692873c847d1c2f62c0James Dong result2 = ((p_ref[dx] + 16) >> 5); 6411cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result2) 6421cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* 3/4 pel, no need to clip */ 6431cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + result2 + 1); 6441cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result >> 1); 6451cc31e629e8132df390ae692873c847d1c2f62c0James Dong pkres |= (result << 24); 6461cc31e629e8132df390ae692873c847d1c2f62c0James Dong *p_cur++ = pkres; /* write 4 pixels */ 6471cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref -= 3; /* offset back to the middle of filter */ 6481cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 6491cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur += curr_offset; /* move to the next line */ 6501cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref += ref_offset; /* move to the next line */ 6511cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 6521cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 6531cc31e629e8132df390ae692873c847d1c2f62c0James Dong else 6541cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 6551cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (j = blkheight; j > 0 ; j--) 6561cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 6571cc31e629e8132df390ae692873c847d1c2f62c0James Dong tmp = (uint32)(p_ref + blkwidth); 6581cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (; (uint32)p_ref < tmp;) 6591cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 6601cc31e629e8132df390ae692873c847d1c2f62c0James Dong 6611cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = p_ref[-2]; 6621cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = p_ref[-1]; 6631cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *p_ref++; 6641cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = *p_ref++; 6651cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 = *p_ref++; 6661cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* first pixel */ 6671cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = *p_ref++; 6681cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r0 + r5); 6691cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r1 + r4); 6701cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 6711cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r2 + r3); 6721cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 6731cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 512) >> 10; 6741cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 6751cc31e629e8132df390ae692873c847d1c2f62c0James Dong pkres = result; 6761cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* second pixel */ 6771cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *p_ref++; 6781cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r1 + r0); 6791cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r2 + r5); 6801cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 6811cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r3 + r4); 6821cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 6831cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 512) >> 10; 6841cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 6851cc31e629e8132df390ae692873c847d1c2f62c0James Dong pkres |= (result << 8); 6861cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* third pixel */ 6871cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *p_ref++; 6881cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r2 + r1); 6891cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r3 + r0); 6901cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 6911cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r4 + r5); 6921cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 6931cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 512) >> 10; 6941cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 6951cc31e629e8132df390ae692873c847d1c2f62c0James Dong pkres |= (result << 16); 6961cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* fourth pixel */ 6971cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *p_ref++; 6981cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r3 + r2); 6991cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r4 + r1); 7001cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 7011cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r5 + r0); 7021cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 7031cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 512) >> 10; 7041cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 7051cc31e629e8132df390ae692873c847d1c2f62c0James Dong pkres |= (result << 24); 7061cc31e629e8132df390ae692873c847d1c2f62c0James Dong *p_cur++ = pkres; /* write 4 pixels */ 7071cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref -= 3; /* offset back to the middle of filter */ 7081cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 7091cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur += curr_offset; /* move to the next line */ 7101cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref += ref_offset; /* move to the next line */ 7111cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 7121cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 7131cc31e629e8132df390ae692873c847d1c2f62c0James Dong 7141cc31e629e8132df390ae692873c847d1c2f62c0James Dong return ; 7151cc31e629e8132df390ae692873c847d1c2f62c0James Dong} 7161cc31e629e8132df390ae692873c847d1c2f62c0James Dong 7171cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eHorzInterp3MC(uint8 *in, int inpitch, int *out, int outpitch, 7181cc31e629e8132df390ae692873c847d1c2f62c0James Dong int blkwidth, int blkheight) 7191cc31e629e8132df390ae692873c847d1c2f62c0James Dong{ 7201cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *p_ref; 7211cc31e629e8132df390ae692873c847d1c2f62c0James Dong int *p_cur; 7221cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint32 tmp; 7231cc31e629e8132df390ae692873c847d1c2f62c0James Dong int result, curr_offset, ref_offset; 7241cc31e629e8132df390ae692873c847d1c2f62c0James Dong int j, r0, r1, r2, r3, r4, r5; 7251cc31e629e8132df390ae692873c847d1c2f62c0James Dong 7261cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur = out; 7271cc31e629e8132df390ae692873c847d1c2f62c0James Dong curr_offset = (outpitch - blkwidth); 7281cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref = in; 7291cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref_offset = inpitch - blkwidth; 7301cc31e629e8132df390ae692873c847d1c2f62c0James Dong 7311cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (j = blkheight; j > 0 ; j--) 7321cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 7331cc31e629e8132df390ae692873c847d1c2f62c0James Dong tmp = (uint32)(p_ref + blkwidth); 7341cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (; (uint32)p_ref < tmp;) 7351cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 7361cc31e629e8132df390ae692873c847d1c2f62c0James Dong 7371cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = p_ref[-2]; 7381cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = p_ref[-1]; 7391cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *p_ref++; 7401cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = *p_ref++; 7411cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 = *p_ref++; 7421cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* first pixel */ 7431cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = *p_ref++; 7441cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r0 + r5); 7451cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r1 + r4); 7461cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 7471cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r2 + r3); 7481cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 7491cc31e629e8132df390ae692873c847d1c2f62c0James Dong *p_cur++ = result; 7501cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* second pixel */ 7511cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *p_ref++; 7521cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r1 + r0); 7531cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r2 + r5); 7541cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 7551cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r3 + r4); 7561cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 7571cc31e629e8132df390ae692873c847d1c2f62c0James Dong *p_cur++ = result; 7581cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* third pixel */ 7591cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *p_ref++; 7601cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r2 + r1); 7611cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r3 + r0); 7621cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 7631cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r4 + r5); 7641cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 7651cc31e629e8132df390ae692873c847d1c2f62c0James Dong *p_cur++ = result; 7661cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* fourth pixel */ 7671cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *p_ref++; 7681cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r3 + r2); 7691cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r4 + r1); 7701cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 7711cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r5 + r0); 7721cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 7731cc31e629e8132df390ae692873c847d1c2f62c0James Dong *p_cur++ = result; 7741cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref -= 3; /* move back to the middle of the filter */ 7751cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 7761cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur += curr_offset; /* move to the next line */ 7771cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref += ref_offset; 7781cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 7791cc31e629e8132df390ae692873c847d1c2f62c0James Dong 7801cc31e629e8132df390ae692873c847d1c2f62c0James Dong return ; 7811cc31e629e8132df390ae692873c847d1c2f62c0James Dong} 7821cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eVertInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch, 7831cc31e629e8132df390ae692873c847d1c2f62c0James Dong int blkwidth, int blkheight, int dy) 7841cc31e629e8132df390ae692873c847d1c2f62c0James Dong{ 7851cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *p_cur, *p_ref; 7861cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint32 tmp; 7871cc31e629e8132df390ae692873c847d1c2f62c0James Dong int result, curr_offset, ref_offset; 7881cc31e629e8132df390ae692873c847d1c2f62c0James Dong int j, i; 7891cc31e629e8132df390ae692873c847d1c2f62c0James Dong int32 r0, r1, r2, r3, r4, r5, r6, r7, r8, r13; 7901cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 tmp_in[24][24]; 7911cc31e629e8132df390ae692873c847d1c2f62c0James Dong 7921cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* not word-aligned */ 7931cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (((uint32)in)&0x3) 7941cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 7951cc31e629e8132df390ae692873c847d1c2f62c0James Dong eCreateAlign(in, inpitch, -2, &tmp_in[0][0], blkwidth, blkheight + 5); 7961cc31e629e8132df390ae692873c847d1c2f62c0James Dong in = &tmp_in[2][0]; 7971cc31e629e8132df390ae692873c847d1c2f62c0James Dong inpitch = 24; 7981cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 7991cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur = out; 8001cc31e629e8132df390ae692873c847d1c2f62c0James Dong curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */ 8011cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref_offset = blkheight * inpitch; /* for limit */ 8021cc31e629e8132df390ae692873c847d1c2f62c0James Dong 8031cc31e629e8132df390ae692873c847d1c2f62c0James Dong curr_offset += 3; 8041cc31e629e8132df390ae692873c847d1c2f62c0James Dong 8051cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (dy&1) 8061cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 8071cc31e629e8132df390ae692873c847d1c2f62c0James Dong dy = (dy >> 1) ? 0 : -inpitch; 8081cc31e629e8132df390ae692873c847d1c2f62c0James Dong 8091cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (j = 0; j < blkwidth; j += 4, in += 4) 8101cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 8111cc31e629e8132df390ae692873c847d1c2f62c0James Dong r13 = 0; 8121cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref = in; 8131cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur -= outpitch; /* compensate for the first offset */ 8141cc31e629e8132df390ae692873c847d1c2f62c0James Dong tmp = (uint32)(p_ref + ref_offset); /* limit */ 8151cc31e629e8132df390ae692873c847d1c2f62c0James Dong while ((uint32)p_ref < tmp) /* the loop un-rolled */ 8161cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 8171cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */ 8181cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref += inpitch; 8191cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */ 8201cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 &= 0xFF00FF; 8211cc31e629e8132df390ae692873c847d1c2f62c0James Dong 8221cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *((uint32*)(p_ref + (inpitch << 1))); /* r1, r7, ref[3] */ 8231cc31e629e8132df390ae692873c847d1c2f62c0James Dong r7 = (r1 >> 8) & 0xFF00FF; 8241cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 &= 0xFF00FF; 8251cc31e629e8132df390ae692873c847d1c2f62c0James Dong 8261cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 += r1; 8271cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 += r7; 8281cc31e629e8132df390ae692873c847d1c2f62c0James Dong 8291cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */ 8301cc31e629e8132df390ae692873c847d1c2f62c0James Dong r8 = (r2 >> 8) & 0xFF00FF; 8311cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 &= 0xFF00FF; 8321cc31e629e8132df390ae692873c847d1c2f62c0James Dong 8331cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */ 8341cc31e629e8132df390ae692873c847d1c2f62c0James Dong r7 = (r1 >> 8) & 0xFF00FF; 8351cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 &= 0xFF00FF; 8361cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 += r2; 8371cc31e629e8132df390ae692873c847d1c2f62c0James Dong 8381cc31e629e8132df390ae692873c847d1c2f62c0James Dong r7 += r8; 8391cc31e629e8132df390ae692873c847d1c2f62c0James Dong 8401cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 += 20 * r1; 8411cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 += 20 * r7; 8421cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 += 0x100010; 8431cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 += 0x100010; 8441cc31e629e8132df390ae692873c847d1c2f62c0James Dong 8451cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */ 8461cc31e629e8132df390ae692873c847d1c2f62c0James Dong r8 = (r2 >> 8) & 0xFF00FF; 8471cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 &= 0xFF00FF; 8481cc31e629e8132df390ae692873c847d1c2f62c0James Dong 8491cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */ 8501cc31e629e8132df390ae692873c847d1c2f62c0James Dong r7 = (r1 >> 8) & 0xFF00FF; 8511cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 &= 0xFF00FF; 8521cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 += r2; 8531cc31e629e8132df390ae692873c847d1c2f62c0James Dong 8541cc31e629e8132df390ae692873c847d1c2f62c0James Dong r7 += r8; 8551cc31e629e8132df390ae692873c847d1c2f62c0James Dong 8561cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 -= 5 * r1; 8571cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 -= 5 * r7; 8581cc31e629e8132df390ae692873c847d1c2f62c0James Dong 8591cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 >>= 5; 8601cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 >>= 5; 8611cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* clip */ 8621cc31e629e8132df390ae692873c847d1c2f62c0James Dong r13 |= r6; 8631cc31e629e8132df390ae692873c847d1c2f62c0James Dong r13 |= r0; 8641cc31e629e8132df390ae692873c847d1c2f62c0James Dong //CLIPPACK(r6,result) 8651cc31e629e8132df390ae692873c847d1c2f62c0James Dong 8661cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *((uint32*)(p_ref + dy)); 8671cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r1 >> 8) & 0xFF00FF; 8681cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 &= 0xFF00FF; 8691cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 += r1; 8701cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 += r2; 8711cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 += 0x10001; 8721cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 += 0x10001; 8731cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r0 >> 1) & 0xFF00FF; 8741cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 = (r6 >> 1) & 0xFF00FF; 8751cc31e629e8132df390ae692873c847d1c2f62c0James Dong 8761cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 |= (r6 << 8); /* pack it back */ 8771cc31e629e8132df390ae692873c847d1c2f62c0James Dong *((uint32*)(p_cur += outpitch)) = r0; 8781cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 8791cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur += curr_offset; /* offset to the next pixel */ 8801cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (r13 & 0xFF000700) /* this column need clipping */ 8811cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 8821cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur -= 4; 8831cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (i = 0; i < 4; i++) 8841cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 8851cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref = in + i; 8861cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur -= outpitch; /* compensate for the first offset */ 8871cc31e629e8132df390ae692873c847d1c2f62c0James Dong 8881cc31e629e8132df390ae692873c847d1c2f62c0James Dong tmp = (uint32)(p_ref + ref_offset); /* limit */ 8891cc31e629e8132df390ae692873c847d1c2f62c0James Dong while ((uint32)p_ref < tmp) 8901cc31e629e8132df390ae692873c847d1c2f62c0James Dong { /* loop un-rolled */ 8911cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *(p_ref - (inpitch << 1)); 8921cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *(p_ref - inpitch); 8931cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *p_ref; 8941cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = *(p_ref += inpitch); /* modify pointer before loading */ 8951cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 = *(p_ref += inpitch); 8961cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* first pixel */ 8971cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = *(p_ref += inpitch); 8981cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r0 + r5); 8991cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r1 + r4); 9001cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 9011cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r2 + r3); 9021cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 9031cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 9041cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 9051cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* 3/4 pel, no need to clip */ 9061cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + p_ref[dy-(inpitch<<1)] + 1); 9071cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result >> 1); 9081cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 9091cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* second pixel */ 9101cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *(p_ref += inpitch); 9111cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r1 + r0); 9121cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r2 + r5); 9131cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 9141cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r3 + r4); 9151cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 9161cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 9171cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 9181cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* 3/4 pel, no need to clip */ 9191cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + p_ref[dy-(inpitch<<1)] + 1); 9201cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result >> 1); 9211cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 9221cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* third pixel */ 9231cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *(p_ref += inpitch); 9241cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r2 + r1); 9251cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r3 + r0); 9261cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 9271cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r4 + r5); 9281cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 9291cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 9301cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 9311cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* 3/4 pel, no need to clip */ 9321cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + p_ref[dy-(inpitch<<1)] + 1); 9331cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result >> 1); 9341cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 9351cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* fourth pixel */ 9361cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *(p_ref += inpitch); 9371cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r3 + r2); 9381cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r4 + r1); 9391cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 9401cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r5 + r0); 9411cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 9421cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 9431cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 9441cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* 3/4 pel, no need to clip */ 9451cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + p_ref[dy-(inpitch<<1)] + 1); 9461cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result >> 1); 9471cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 9481cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ 9491cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 9501cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur += (curr_offset - 3); 9511cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 9521cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 9531cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 9541cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 9551cc31e629e8132df390ae692873c847d1c2f62c0James Dong else 9561cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 9571cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (j = 0; j < blkwidth; j += 4, in += 4) 9581cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 9591cc31e629e8132df390ae692873c847d1c2f62c0James Dong r13 = 0; 9601cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref = in; 9611cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur -= outpitch; /* compensate for the first offset */ 9621cc31e629e8132df390ae692873c847d1c2f62c0James Dong tmp = (uint32)(p_ref + ref_offset); /* limit */ 9631cc31e629e8132df390ae692873c847d1c2f62c0James Dong while ((uint32)p_ref < tmp) /* the loop un-rolled */ 9641cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 9651cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */ 9661cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref += inpitch; 9671cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */ 9681cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 &= 0xFF00FF; 9691cc31e629e8132df390ae692873c847d1c2f62c0James Dong 9701cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *((uint32*)(p_ref + (inpitch << 1))); /* r1, r7, ref[3] */ 9711cc31e629e8132df390ae692873c847d1c2f62c0James Dong r7 = (r1 >> 8) & 0xFF00FF; 9721cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 &= 0xFF00FF; 9731cc31e629e8132df390ae692873c847d1c2f62c0James Dong 9741cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 += r1; 9751cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 += r7; 9761cc31e629e8132df390ae692873c847d1c2f62c0James Dong 9771cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */ 9781cc31e629e8132df390ae692873c847d1c2f62c0James Dong r8 = (r2 >> 8) & 0xFF00FF; 9791cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 &= 0xFF00FF; 9801cc31e629e8132df390ae692873c847d1c2f62c0James Dong 9811cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */ 9821cc31e629e8132df390ae692873c847d1c2f62c0James Dong r7 = (r1 >> 8) & 0xFF00FF; 9831cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 &= 0xFF00FF; 9841cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 += r2; 9851cc31e629e8132df390ae692873c847d1c2f62c0James Dong 9861cc31e629e8132df390ae692873c847d1c2f62c0James Dong r7 += r8; 9871cc31e629e8132df390ae692873c847d1c2f62c0James Dong 9881cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 += 20 * r1; 9891cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 += 20 * r7; 9901cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 += 0x100010; 9911cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 += 0x100010; 9921cc31e629e8132df390ae692873c847d1c2f62c0James Dong 9931cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */ 9941cc31e629e8132df390ae692873c847d1c2f62c0James Dong r8 = (r2 >> 8) & 0xFF00FF; 9951cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 &= 0xFF00FF; 9961cc31e629e8132df390ae692873c847d1c2f62c0James Dong 9971cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */ 9981cc31e629e8132df390ae692873c847d1c2f62c0James Dong r7 = (r1 >> 8) & 0xFF00FF; 9991cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 &= 0xFF00FF; 10001cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 += r2; 10011cc31e629e8132df390ae692873c847d1c2f62c0James Dong 10021cc31e629e8132df390ae692873c847d1c2f62c0James Dong r7 += r8; 10031cc31e629e8132df390ae692873c847d1c2f62c0James Dong 10041cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 -= 5 * r1; 10051cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 -= 5 * r7; 10061cc31e629e8132df390ae692873c847d1c2f62c0James Dong 10071cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 >>= 5; 10081cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 >>= 5; 10091cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* clip */ 10101cc31e629e8132df390ae692873c847d1c2f62c0James Dong r13 |= r6; 10111cc31e629e8132df390ae692873c847d1c2f62c0James Dong r13 |= r0; 10121cc31e629e8132df390ae692873c847d1c2f62c0James Dong //CLIPPACK(r6,result) 10131cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 &= 0xFF00FF; 10141cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 &= 0xFF00FF; 10151cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 |= (r6 << 8); /* pack it back */ 10161cc31e629e8132df390ae692873c847d1c2f62c0James Dong *((uint32*)(p_cur += outpitch)) = r0; 10171cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 10181cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur += curr_offset; /* offset to the next pixel */ 10191cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (r13 & 0xFF000700) /* this column need clipping */ 10201cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 10211cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur -= 4; 10221cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (i = 0; i < 4; i++) 10231cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 10241cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref = in + i; 10251cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur -= outpitch; /* compensate for the first offset */ 10261cc31e629e8132df390ae692873c847d1c2f62c0James Dong tmp = (uint32)(p_ref + ref_offset); /* limit */ 10271cc31e629e8132df390ae692873c847d1c2f62c0James Dong while ((uint32)p_ref < tmp) 10281cc31e629e8132df390ae692873c847d1c2f62c0James Dong { /* loop un-rolled */ 10291cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *(p_ref - (inpitch << 1)); 10301cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *(p_ref - inpitch); 10311cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *p_ref; 10321cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = *(p_ref += inpitch); /* modify pointer before loading */ 10331cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 = *(p_ref += inpitch); 10341cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* first pixel */ 10351cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = *(p_ref += inpitch); 10361cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r0 + r5); 10371cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r1 + r4); 10381cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 10391cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r2 + r3); 10401cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 10411cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 10421cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 10431cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 10441cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* second pixel */ 10451cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *(p_ref += inpitch); 10461cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r1 + r0); 10471cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r2 + r5); 10481cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 10491cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r3 + r4); 10501cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 10511cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 10521cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 10531cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 10541cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* third pixel */ 10551cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *(p_ref += inpitch); 10561cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r2 + r1); 10571cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r3 + r0); 10581cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 10591cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r4 + r5); 10601cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 10611cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 10621cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 10631cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 10641cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* fourth pixel */ 10651cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *(p_ref += inpitch); 10661cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r3 + r2); 10671cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r4 + r1); 10681cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 10691cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r5 + r0); 10701cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 10711cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 10721cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 10731cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 10741cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ 10751cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 10761cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur += (curr_offset - 3); 10771cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 10781cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 10791cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 10801cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 10811cc31e629e8132df390ae692873c847d1c2f62c0James Dong 10821cc31e629e8132df390ae692873c847d1c2f62c0James Dong return ; 10831cc31e629e8132df390ae692873c847d1c2f62c0James Dong} 10841cc31e629e8132df390ae692873c847d1c2f62c0James Dong 10851cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eVertInterp2MC(uint8 *in, int inpitch, int *out, int outpitch, 10861cc31e629e8132df390ae692873c847d1c2f62c0James Dong int blkwidth, int blkheight) 10871cc31e629e8132df390ae692873c847d1c2f62c0James Dong{ 10881cc31e629e8132df390ae692873c847d1c2f62c0James Dong int *p_cur; 10891cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *p_ref; 10901cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint32 tmp; 10911cc31e629e8132df390ae692873c847d1c2f62c0James Dong int result, curr_offset, ref_offset; 10921cc31e629e8132df390ae692873c847d1c2f62c0James Dong int j, r0, r1, r2, r3, r4, r5; 10931cc31e629e8132df390ae692873c847d1c2f62c0James Dong 10941cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur = out; 10951cc31e629e8132df390ae692873c847d1c2f62c0James Dong curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */ 10961cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref_offset = blkheight * inpitch; /* for limit */ 10971cc31e629e8132df390ae692873c847d1c2f62c0James Dong 10981cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (j = 0; j < blkwidth; j++) 10991cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 11001cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur -= outpitch; /* compensate for the first offset */ 11011cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref = in++; 11021cc31e629e8132df390ae692873c847d1c2f62c0James Dong 11031cc31e629e8132df390ae692873c847d1c2f62c0James Dong tmp = (uint32)(p_ref + ref_offset); /* limit */ 11041cc31e629e8132df390ae692873c847d1c2f62c0James Dong while ((uint32)p_ref < tmp) 11051cc31e629e8132df390ae692873c847d1c2f62c0James Dong { /* loop un-rolled */ 11061cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *(p_ref - (inpitch << 1)); 11071cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *(p_ref - inpitch); 11081cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *p_ref; 11091cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = *(p_ref += inpitch); /* modify pointer before loading */ 11101cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 = *(p_ref += inpitch); 11111cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* first pixel */ 11121cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = *(p_ref += inpitch); 11131cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r0 + r5); 11141cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r1 + r4); 11151cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 11161cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r2 + r3); 11171cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 11181cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 11191cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* second pixel */ 11201cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *(p_ref += inpitch); 11211cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r1 + r0); 11221cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r2 + r5); 11231cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 11241cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r3 + r4); 11251cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 11261cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 11271cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* third pixel */ 11281cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *(p_ref += inpitch); 11291cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r2 + r1); 11301cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r3 + r0); 11311cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 11321cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r4 + r5); 11331cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 11341cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 11351cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* fourth pixel */ 11361cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *(p_ref += inpitch); 11371cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r3 + r2); 11381cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r4 + r1); 11391cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 11401cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r5 + r0); 11411cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 11421cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 11431cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ 11441cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 11451cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur += curr_offset; 11461cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 11471cc31e629e8132df390ae692873c847d1c2f62c0James Dong 11481cc31e629e8132df390ae692873c847d1c2f62c0James Dong return ; 11491cc31e629e8132df390ae692873c847d1c2f62c0James Dong} 11501cc31e629e8132df390ae692873c847d1c2f62c0James Dong 11511cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eVertInterp3MC(int *in, int inpitch, uint8 *out, int outpitch, 11521cc31e629e8132df390ae692873c847d1c2f62c0James Dong int blkwidth, int blkheight, int dy) 11531cc31e629e8132df390ae692873c847d1c2f62c0James Dong{ 11541cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *p_cur; 11551cc31e629e8132df390ae692873c847d1c2f62c0James Dong int *p_ref; 11561cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint32 tmp; 11571cc31e629e8132df390ae692873c847d1c2f62c0James Dong int result, result2, curr_offset, ref_offset; 11581cc31e629e8132df390ae692873c847d1c2f62c0James Dong int j, r0, r1, r2, r3, r4, r5; 11591cc31e629e8132df390ae692873c847d1c2f62c0James Dong 11601cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur = out; 11611cc31e629e8132df390ae692873c847d1c2f62c0James Dong curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */ 11621cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref_offset = blkheight * inpitch; /* for limit */ 11631cc31e629e8132df390ae692873c847d1c2f62c0James Dong 11641cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (dy&1) 11651cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 11661cc31e629e8132df390ae692873c847d1c2f62c0James Dong dy = (dy >> 1) ? -(inpitch << 1) : -(inpitch << 1) - inpitch; 11671cc31e629e8132df390ae692873c847d1c2f62c0James Dong 11681cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (j = 0; j < blkwidth; j++) 11691cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 11701cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur -= outpitch; /* compensate for the first offset */ 11711cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref = in++; 11721cc31e629e8132df390ae692873c847d1c2f62c0James Dong 11731cc31e629e8132df390ae692873c847d1c2f62c0James Dong tmp = (uint32)(p_ref + ref_offset); /* limit */ 11741cc31e629e8132df390ae692873c847d1c2f62c0James Dong while ((uint32)p_ref < tmp) 11751cc31e629e8132df390ae692873c847d1c2f62c0James Dong { /* loop un-rolled */ 11761cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *(p_ref - (inpitch << 1)); 11771cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *(p_ref - inpitch); 11781cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *p_ref; 11791cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = *(p_ref += inpitch); /* modify pointer before loading */ 11801cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 = *(p_ref += inpitch); 11811cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* first pixel */ 11821cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = *(p_ref += inpitch); 11831cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r0 + r5); 11841cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r1 + r4); 11851cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 11861cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r2 + r3); 11871cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 11881cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 512) >> 10; 11891cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 11901cc31e629e8132df390ae692873c847d1c2f62c0James Dong result2 = ((p_ref[dy] + 16) >> 5); 11911cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result2) 11921cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* 3/4 pel, no need to clip */ 11931cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + result2 + 1); 11941cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result >> 1); 11951cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 11961cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* second pixel */ 11971cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *(p_ref += inpitch); 11981cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r1 + r0); 11991cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r2 + r5); 12001cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 12011cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r3 + r4); 12021cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 12031cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 512) >> 10; 12041cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 12051cc31e629e8132df390ae692873c847d1c2f62c0James Dong result2 = ((p_ref[dy] + 16) >> 5); 12061cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result2) 12071cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* 3/4 pel, no need to clip */ 12081cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + result2 + 1); 12091cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result >> 1); 12101cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 12111cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* third pixel */ 12121cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *(p_ref += inpitch); 12131cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r2 + r1); 12141cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r3 + r0); 12151cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 12161cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r4 + r5); 12171cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 12181cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 512) >> 10; 12191cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 12201cc31e629e8132df390ae692873c847d1c2f62c0James Dong result2 = ((p_ref[dy] + 16) >> 5); 12211cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result2) 12221cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* 3/4 pel, no need to clip */ 12231cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + result2 + 1); 12241cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result >> 1); 12251cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 12261cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* fourth pixel */ 12271cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *(p_ref += inpitch); 12281cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r3 + r2); 12291cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r4 + r1); 12301cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 12311cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r5 + r0); 12321cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 12331cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 512) >> 10; 12341cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 12351cc31e629e8132df390ae692873c847d1c2f62c0James Dong result2 = ((p_ref[dy] + 16) >> 5); 12361cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result2) 12371cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* 3/4 pel, no need to clip */ 12381cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + result2 + 1); 12391cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result >> 1); 12401cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 12411cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ 12421cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 12431cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur += curr_offset; 12441cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 12451cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 12461cc31e629e8132df390ae692873c847d1c2f62c0James Dong else 12471cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 12481cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (j = 0; j < blkwidth; j++) 12491cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 12501cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur -= outpitch; /* compensate for the first offset */ 12511cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref = in++; 12521cc31e629e8132df390ae692873c847d1c2f62c0James Dong 12531cc31e629e8132df390ae692873c847d1c2f62c0James Dong tmp = (uint32)(p_ref + ref_offset); /* limit */ 12541cc31e629e8132df390ae692873c847d1c2f62c0James Dong while ((uint32)p_ref < tmp) 12551cc31e629e8132df390ae692873c847d1c2f62c0James Dong { /* loop un-rolled */ 12561cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *(p_ref - (inpitch << 1)); 12571cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *(p_ref - inpitch); 12581cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *p_ref; 12591cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = *(p_ref += inpitch); /* modify pointer before loading */ 12601cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 = *(p_ref += inpitch); 12611cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* first pixel */ 12621cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = *(p_ref += inpitch); 12631cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r0 + r5); 12641cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r1 + r4); 12651cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 12661cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r2 + r3); 12671cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 12681cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 512) >> 10; 12691cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 12701cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 12711cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* second pixel */ 12721cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *(p_ref += inpitch); 12731cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r1 + r0); 12741cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r2 + r5); 12751cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 12761cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r3 + r4); 12771cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 12781cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 512) >> 10; 12791cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 12801cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 12811cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* third pixel */ 12821cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *(p_ref += inpitch); 12831cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r2 + r1); 12841cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r3 + r0); 12851cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 12861cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r4 + r5); 12871cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 12881cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 512) >> 10; 12891cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 12901cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 12911cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* fourth pixel */ 12921cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *(p_ref += inpitch); 12931cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r3 + r2); 12941cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r4 + r1); 12951cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 12961cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r5 + r0); 12971cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 12981cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 512) >> 10; 12991cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 13001cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 13011cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ 13021cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 13031cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur += curr_offset; 13041cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 13051cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 13061cc31e629e8132df390ae692873c847d1c2f62c0James Dong 13071cc31e629e8132df390ae692873c847d1c2f62c0James Dong return ; 13081cc31e629e8132df390ae692873c847d1c2f62c0James Dong} 13091cc31e629e8132df390ae692873c847d1c2f62c0James Dong 13101cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eDiagonalInterpMC(uint8 *in1, uint8 *in2, int inpitch, 13111cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *out, int outpitch, 13121cc31e629e8132df390ae692873c847d1c2f62c0James Dong int blkwidth, int blkheight) 13131cc31e629e8132df390ae692873c847d1c2f62c0James Dong{ 13141cc31e629e8132df390ae692873c847d1c2f62c0James Dong int j, i; 13151cc31e629e8132df390ae692873c847d1c2f62c0James Dong int result; 13161cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *p_cur, *p_ref, *p_tmp8; 13171cc31e629e8132df390ae692873c847d1c2f62c0James Dong int curr_offset, ref_offset; 13181cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 tmp_res[24][24], tmp_in[24][24]; 13191cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint32 *p_tmp; 13201cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint32 tmp, pkres, tmp_result; 13211cc31e629e8132df390ae692873c847d1c2f62c0James Dong int32 r0, r1, r2, r3, r4, r5; 13221cc31e629e8132df390ae692873c847d1c2f62c0James Dong int32 r6, r7, r8, r9, r10, r13; 13231cc31e629e8132df390ae692873c847d1c2f62c0James Dong 13241cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref_offset = inpitch - blkwidth; 13251cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref = in1 - 2; 13261cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* perform horizontal interpolation */ 13271cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* not word-aligned */ 13281cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* It is faster to read 1 byte at time to avoid calling CreateAlign */ 13291cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* if(((uint32)p_ref)&0x3) 13301cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 13311cc31e629e8132df390ae692873c847d1c2f62c0James Dong CreateAlign(p_ref,inpitch,0,&tmp_in[0][0],blkwidth+8,blkheight); 13321cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref = &tmp_in[0][0]; 13331cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref_offset = 24-blkwidth; 13341cc31e629e8132df390ae692873c847d1c2f62c0James Dong }*/ 13351cc31e629e8132df390ae692873c847d1c2f62c0James Dong 13361cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_tmp = (uint32*) & (tmp_res[0][0]); 13371cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (j = blkheight; j > 0; j--) 13381cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 13391cc31e629e8132df390ae692873c847d1c2f62c0James Dong r13 = 0; 13401cc31e629e8132df390ae692873c847d1c2f62c0James Dong tmp = (uint32)(p_ref + blkwidth); 13411cc31e629e8132df390ae692873c847d1c2f62c0James Dong 13421cc31e629e8132df390ae692873c847d1c2f62c0James Dong //r0 = *((uint32*)p_ref); /* d,c,b,a */ 13431cc31e629e8132df390ae692873c847d1c2f62c0James Dong //r1 = (r0>>8)&0xFF00FF; /* 0,d,0,b */ 13441cc31e629e8132df390ae692873c847d1c2f62c0James Dong //r0 &= 0xFF00FF; /* 0,c,0,a */ 13451cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* It is faster to read 1 byte at a time */ 13461cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = p_ref[0]; 13471cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = p_ref[2]; 13481cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 |= (r1 << 16); /* 0,c,0,a */ 13491cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = p_ref[1]; 13501cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = p_ref[3]; 13511cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 |= (r2 << 16); /* 0,d,0,b */ 13521cc31e629e8132df390ae692873c847d1c2f62c0James Dong 13531cc31e629e8132df390ae692873c847d1c2f62c0James Dong while ((uint32)p_ref < tmp) 13541cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 13551cc31e629e8132df390ae692873c847d1c2f62c0James Dong //r2 = *((uint32*)(p_ref+=4));/* h,g,f,e */ 13561cc31e629e8132df390ae692873c847d1c2f62c0James Dong //r3 = (r2>>8)&0xFF00FF; /* 0,h,0,f */ 13571cc31e629e8132df390ae692873c847d1c2f62c0James Dong //r2 &= 0xFF00FF; /* 0,g,0,e */ 13581cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* It is faster to read 1 byte at a time */ 13591cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *(p_ref += 4); 13601cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = p_ref[2]; 13611cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 |= (r3 << 16); /* 0,g,0,e */ 13621cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = p_ref[1]; 13631cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 = p_ref[3]; 13641cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 |= (r4 << 16); /* 0,h,0,f */ 13651cc31e629e8132df390ae692873c847d1c2f62c0James Dong 13661cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 = r0 + r3; /* c+h, a+f */ 13671cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = r0 + r1; /* c+d, a+b */ 13681cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 = r2 + r3; /* g+h, e+f */ 13691cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 >>= 16; 13701cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 |= (r6 << 16); /* e+f, c+d */ 13711cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 += r5 * 20; /* c+20*e+20*f+h, a+20*c+20*d+f */ 13721cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 += 0x100010; /* +16, +16 */ 13731cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = r1 + r2; /* d+g, b+e */ 13741cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 -= r5 * 5; /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */ 13751cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 >>= 5; 13761cc31e629e8132df390ae692873c847d1c2f62c0James Dong r13 |= r4; /* check clipping */ 13771cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 &= 0xFF00FF; /* mask */ 13781cc31e629e8132df390ae692873c847d1c2f62c0James Dong 13791cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = p_ref[4]; /* i */ 13801cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 = (r5 << 16); 13811cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = r6 | (r2 >> 16);/* 0,i,0,g */ 13821cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 += r1; /* d+i, b+g */ /* r5 not free */ 13831cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 >>= 16; 13841cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */ 13851cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 += r2; /* f+g, d+e */ 13861cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 += 20 * r1; /* d+20f+20g+i, b+20d+20e+g */ 13871cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 >>= 16; 13881cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */ 13891cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 += r3; /* e+h, c+f */ 13901cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 += 0x100010; /* 16,16 */ 13911cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 -= r0 * 5; /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */ 13921cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 >>= 5; 13931cc31e629e8132df390ae692873c847d1c2f62c0James Dong r13 |= r5; /* check clipping */ 13941cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 &= 0xFF00FF; /* mask */ 13951cc31e629e8132df390ae692873c847d1c2f62c0James Dong 13961cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 |= (r5 << 8); /* pack them together */ 13971cc31e629e8132df390ae692873c847d1c2f62c0James Dong *p_tmp++ = r4; 13981cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = r3; 13991cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = r2; 14001cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 14011cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_tmp += ((24 - blkwidth) >> 2); /* move to the next line */ 14021cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */ 14031cc31e629e8132df390ae692873c847d1c2f62c0James Dong 14041cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (r13&0xFF000700) /* need clipping */ 14051cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 14061cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* move back to the beginning of the line */ 14071cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref -= (ref_offset + blkwidth); /* input */ 14081cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_tmp -= 6; /* intermediate output */ 14091cc31e629e8132df390ae692873c847d1c2f62c0James Dong tmp = (uint32)(p_ref + blkwidth); 14101cc31e629e8132df390ae692873c847d1c2f62c0James Dong while ((uint32)p_ref < tmp) 14111cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 14121cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *p_ref++; 14131cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *p_ref++; 14141cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *p_ref++; 14151cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = *p_ref++; 14161cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 = *p_ref++; 14171cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* first pixel */ 14181cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = *p_ref++; 14191cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r0 + r5); 14201cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r1 + r4); 14211cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 14221cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r2 + r3); 14231cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 14241cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 14251cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 14261cc31e629e8132df390ae692873c847d1c2f62c0James Dong pkres = result; 14271cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* second pixel */ 14281cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *p_ref++; 14291cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r1 + r0); 14301cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r2 + r5); 14311cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 14321cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r3 + r4); 14331cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 14341cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 14351cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 14361cc31e629e8132df390ae692873c847d1c2f62c0James Dong pkres |= (result << 8); 14371cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* third pixel */ 14381cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *p_ref++; 14391cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r2 + r1); 14401cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r3 + r0); 14411cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 14421cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r4 + r5); 14431cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 14441cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 14451cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 14461cc31e629e8132df390ae692873c847d1c2f62c0James Dong pkres |= (result << 16); 14471cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* fourth pixel */ 14481cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *p_ref++; 14491cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r3 + r2); 14501cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r4 + r1); 14511cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 14521cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r5 + r0); 14531cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 14541cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 14551cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 14561cc31e629e8132df390ae692873c847d1c2f62c0James Dong pkres |= (result << 24); 14571cc31e629e8132df390ae692873c847d1c2f62c0James Dong 14581cc31e629e8132df390ae692873c847d1c2f62c0James Dong *p_tmp++ = pkres; /* write 4 pixel */ 14591cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref -= 5; 14601cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 14611cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_tmp += ((24 - blkwidth) >> 2); /* move to the next line */ 14621cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */ 14631cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 14641cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 14651cc31e629e8132df390ae692873c847d1c2f62c0James Dong 14661cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* perform vertical interpolation */ 14671cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* not word-aligned */ 14681cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (((uint32)in2)&0x3) 14691cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 14701cc31e629e8132df390ae692873c847d1c2f62c0James Dong eCreateAlign(in2, inpitch, -2, &tmp_in[0][0], blkwidth, blkheight + 5); 14711cc31e629e8132df390ae692873c847d1c2f62c0James Dong in2 = &tmp_in[2][0]; 14721cc31e629e8132df390ae692873c847d1c2f62c0James Dong inpitch = 24; 14731cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 14741cc31e629e8132df390ae692873c847d1c2f62c0James Dong 14751cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur = out; 14761cc31e629e8132df390ae692873c847d1c2f62c0James Dong curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically up and one pixel right */ 14771cc31e629e8132df390ae692873c847d1c2f62c0James Dong pkres = blkheight * inpitch; /* reuse it for limit */ 14781cc31e629e8132df390ae692873c847d1c2f62c0James Dong 14791cc31e629e8132df390ae692873c847d1c2f62c0James Dong curr_offset += 3; 14801cc31e629e8132df390ae692873c847d1c2f62c0James Dong 14811cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (j = 0; j < blkwidth; j += 4, in2 += 4) 14821cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 14831cc31e629e8132df390ae692873c847d1c2f62c0James Dong r13 = 0; 14841cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref = in2; 14851cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_tmp8 = &(tmp_res[0][j]); /* intermediate result */ 14861cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_tmp8 -= 24; /* compensate for the first offset */ 14871cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur -= outpitch; /* compensate for the first offset */ 14881cc31e629e8132df390ae692873c847d1c2f62c0James Dong tmp = (uint32)(p_ref + pkres); /* limit */ 14891cc31e629e8132df390ae692873c847d1c2f62c0James Dong while ((uint32)p_ref < tmp) /* the loop un-rolled */ 14901cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 14911cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* Read 1 byte at a time is too slow, too many read and pack ops, need to call CreateAlign */ 14921cc31e629e8132df390ae692873c847d1c2f62c0James Dong /*p_ref8 = p_ref-(inpitch<<1); r0 = p_ref8[0]; r1 = p_ref8[2]; 14931cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 |= (r1<<16); r6 = p_ref8[1]; r1 = p_ref8[3]; 14941cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 |= (r1<<16); p_ref+=inpitch; */ 14951cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */ 14961cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref += inpitch; 14971cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */ 14981cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 &= 0xFF00FF; 14991cc31e629e8132df390ae692873c847d1c2f62c0James Dong 15001cc31e629e8132df390ae692873c847d1c2f62c0James Dong /*p_ref8 = p_ref+(inpitch<<1); 15011cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = p_ref8[0]; r7 = p_ref8[2]; r1 |= (r7<<16); 15021cc31e629e8132df390ae692873c847d1c2f62c0James Dong r7 = p_ref8[1]; r2 = p_ref8[3]; r7 |= (r2<<16);*/ 15031cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *((uint32*)(p_ref + (inpitch << 1))); /* r1, r7, ref[3] */ 15041cc31e629e8132df390ae692873c847d1c2f62c0James Dong r7 = (r1 >> 8) & 0xFF00FF; 15051cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 &= 0xFF00FF; 15061cc31e629e8132df390ae692873c847d1c2f62c0James Dong 15071cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 += r1; 15081cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 += r7; 15091cc31e629e8132df390ae692873c847d1c2f62c0James Dong 15101cc31e629e8132df390ae692873c847d1c2f62c0James Dong /*r2 = p_ref[0]; r8 = p_ref[2]; r2 |= (r8<<16); 15111cc31e629e8132df390ae692873c847d1c2f62c0James Dong r8 = p_ref[1]; r1 = p_ref[3]; r8 |= (r1<<16);*/ 15121cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */ 15131cc31e629e8132df390ae692873c847d1c2f62c0James Dong r8 = (r2 >> 8) & 0xFF00FF; 15141cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 &= 0xFF00FF; 15151cc31e629e8132df390ae692873c847d1c2f62c0James Dong 15161cc31e629e8132df390ae692873c847d1c2f62c0James Dong /*p_ref8 = p_ref-inpitch; r1 = p_ref8[0]; r7 = p_ref8[2]; 15171cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 |= (r7<<16); r1 += r2; r7 = p_ref8[1]; 15181cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = p_ref8[3]; r7 |= (r2<<16);*/ 15191cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */ 15201cc31e629e8132df390ae692873c847d1c2f62c0James Dong r7 = (r1 >> 8) & 0xFF00FF; 15211cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 &= 0xFF00FF; 15221cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 += r2; 15231cc31e629e8132df390ae692873c847d1c2f62c0James Dong 15241cc31e629e8132df390ae692873c847d1c2f62c0James Dong r7 += r8; 15251cc31e629e8132df390ae692873c847d1c2f62c0James Dong 15261cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 += 20 * r1; 15271cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 += 20 * r7; 15281cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 += 0x100010; 15291cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 += 0x100010; 15301cc31e629e8132df390ae692873c847d1c2f62c0James Dong 15311cc31e629e8132df390ae692873c847d1c2f62c0James Dong /*p_ref8 = p_ref-(inpitch<<1); r2 = p_ref8[0]; r8 = p_ref8[2]; 15321cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 |= (r8<<16); r8 = p_ref8[1]; r1 = p_ref8[3]; r8 |= (r1<<16);*/ 15331cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */ 15341cc31e629e8132df390ae692873c847d1c2f62c0James Dong r8 = (r2 >> 8) & 0xFF00FF; 15351cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 &= 0xFF00FF; 15361cc31e629e8132df390ae692873c847d1c2f62c0James Dong 15371cc31e629e8132df390ae692873c847d1c2f62c0James Dong /*p_ref8 = p_ref+inpitch; r1 = p_ref8[0]; r7 = p_ref8[2]; 15381cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 |= (r7<<16); r1 += r2; r7 = p_ref8[1]; 15391cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = p_ref8[3]; r7 |= (r2<<16);*/ 15401cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */ 15411cc31e629e8132df390ae692873c847d1c2f62c0James Dong r7 = (r1 >> 8) & 0xFF00FF; 15421cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 &= 0xFF00FF; 15431cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 += r2; 15441cc31e629e8132df390ae692873c847d1c2f62c0James Dong 15451cc31e629e8132df390ae692873c847d1c2f62c0James Dong r7 += r8; 15461cc31e629e8132df390ae692873c847d1c2f62c0James Dong 15471cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 -= 5 * r1; 15481cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 -= 5 * r7; 15491cc31e629e8132df390ae692873c847d1c2f62c0James Dong 15501cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 >>= 5; 15511cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 >>= 5; 15521cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* clip */ 15531cc31e629e8132df390ae692873c847d1c2f62c0James Dong r13 |= r6; 15541cc31e629e8132df390ae692873c847d1c2f62c0James Dong r13 |= r0; 15551cc31e629e8132df390ae692873c847d1c2f62c0James Dong //CLIPPACK(r6,result) 15561cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* add with horizontal results */ 15571cc31e629e8132df390ae692873c847d1c2f62c0James Dong r10 = *((uint32*)(p_tmp8 += 24)); 15581cc31e629e8132df390ae692873c847d1c2f62c0James Dong r9 = (r10 >> 8) & 0xFF00FF; 15591cc31e629e8132df390ae692873c847d1c2f62c0James Dong r10 &= 0xFF00FF; 15601cc31e629e8132df390ae692873c847d1c2f62c0James Dong 15611cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 += r10; 15621cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 += 0x10001; 15631cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r0 >> 1) & 0xFF00FF; /* mask to 8 bytes */ 15641cc31e629e8132df390ae692873c847d1c2f62c0James Dong 15651cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 += r9; 15661cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 += 0x10001; 15671cc31e629e8132df390ae692873c847d1c2f62c0James Dong r6 = (r6 >> 1) & 0xFF00FF; /* mask to 8 bytes */ 15681cc31e629e8132df390ae692873c847d1c2f62c0James Dong 15691cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 |= (r6 << 8); /* pack it back */ 15701cc31e629e8132df390ae692873c847d1c2f62c0James Dong *((uint32*)(p_cur += outpitch)) = r0; 15711cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 15721cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur += curr_offset; /* offset to the next pixel */ 15731cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (r13 & 0xFF000700) /* this column need clipping */ 15741cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 15751cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur -= 4; 15761cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (i = 0; i < 4; i++) 15771cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 15781cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref = in2 + i; 15791cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_tmp8 = &(tmp_res[0][j+i]); /* intermediate result */ 15801cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_tmp8 -= 24; /* compensate for the first offset */ 15811cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur -= outpitch; /* compensate for the first offset */ 15821cc31e629e8132df390ae692873c847d1c2f62c0James Dong tmp = (uint32)(p_ref + pkres); /* limit */ 15831cc31e629e8132df390ae692873c847d1c2f62c0James Dong while ((uint32)p_ref < tmp) /* the loop un-rolled */ 15841cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 15851cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *(p_ref - (inpitch << 1)); 15861cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *(p_ref - inpitch); 15871cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *p_ref; 15881cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = *(p_ref += inpitch); /* modify pointer before loading */ 15891cc31e629e8132df390ae692873c847d1c2f62c0James Dong r4 = *(p_ref += inpitch); 15901cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* first pixel */ 15911cc31e629e8132df390ae692873c847d1c2f62c0James Dong r5 = *(p_ref += inpitch); 15921cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r0 + r5); 15931cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r1 + r4); 15941cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 15951cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = (r2 + r3); 15961cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 15971cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 15981cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 15991cc31e629e8132df390ae692873c847d1c2f62c0James Dong tmp_result = *(p_tmp8 += 24); /* modify pointer before loading */ 16001cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + tmp_result + 1); /* no clip */ 16011cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result >> 1); 16021cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 16031cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* second pixel */ 16041cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *(p_ref += inpitch); 16051cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r1 + r0); 16061cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r2 + r5); 16071cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 16081cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = (r3 + r4); 16091cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 16101cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 16111cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 16121cc31e629e8132df390ae692873c847d1c2f62c0James Dong tmp_result = *(p_tmp8 += 24); /* intermediate result */ 16131cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + tmp_result + 1); /* no clip */ 16141cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result >> 1); 16151cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 16161cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* third pixel */ 16171cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *(p_ref += inpitch); 16181cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r2 + r1); 16191cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r3 + r0); 16201cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 16211cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = (r4 + r5); 16221cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 16231cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 16241cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 16251cc31e629e8132df390ae692873c847d1c2f62c0James Dong tmp_result = *(p_tmp8 += 24); /* intermediate result */ 16261cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + tmp_result + 1); /* no clip */ 16271cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result >> 1); 16281cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 16291cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* fourth pixel */ 16301cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *(p_ref += inpitch); 16311cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (r3 + r2); 16321cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r4 + r1); 16331cc31e629e8132df390ae692873c847d1c2f62c0James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 16341cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = (r5 + r0); 16351cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 16361cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + 16) >> 5; 16371cc31e629e8132df390ae692873c847d1c2f62c0James Dong CLIP_RESULT(result) 16381cc31e629e8132df390ae692873c847d1c2f62c0James Dong tmp_result = *(p_tmp8 += 24); /* intermediate result */ 16391cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result + tmp_result + 1); /* no clip */ 16401cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = (result >> 1); 16411cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(p_cur += outpitch) = result; 16421cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ 16431cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 16441cc31e629e8132df390ae692873c847d1c2f62c0James Dong p_cur += (curr_offset - 3); 16451cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 16461cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 16471cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 16481cc31e629e8132df390ae692873c847d1c2f62c0James Dong 16491cc31e629e8132df390ae692873c847d1c2f62c0James Dong return ; 16501cc31e629e8132df390ae692873c847d1c2f62c0James Dong} 16511cc31e629e8132df390ae692873c847d1c2f62c0James Dong 16521cc31e629e8132df390ae692873c847d1c2f62c0James Dong/* position G */ 16531cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eFullPelMC(uint8 *in, int inpitch, uint8 *out, int outpitch, 16541cc31e629e8132df390ae692873c847d1c2f62c0James Dong int blkwidth, int blkheight) 16551cc31e629e8132df390ae692873c847d1c2f62c0James Dong{ 16561cc31e629e8132df390ae692873c847d1c2f62c0James Dong int i, j; 16571cc31e629e8132df390ae692873c847d1c2f62c0James Dong int offset_in = inpitch - blkwidth; 16581cc31e629e8132df390ae692873c847d1c2f62c0James Dong int offset_out = outpitch - blkwidth; 16591cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint32 temp; 16601cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 byte; 16611cc31e629e8132df390ae692873c847d1c2f62c0James Dong 16621cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (((uint32)in)&3) 16631cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 16641cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (j = blkheight; j > 0; j--) 16651cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 16661cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (i = blkwidth; i > 0; i -= 4) 16671cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 16681cc31e629e8132df390ae692873c847d1c2f62c0James Dong temp = *in++; 16691cc31e629e8132df390ae692873c847d1c2f62c0James Dong byte = *in++; 16701cc31e629e8132df390ae692873c847d1c2f62c0James Dong temp |= (byte << 8); 16711cc31e629e8132df390ae692873c847d1c2f62c0James Dong byte = *in++; 16721cc31e629e8132df390ae692873c847d1c2f62c0James Dong temp |= (byte << 16); 16731cc31e629e8132df390ae692873c847d1c2f62c0James Dong byte = *in++; 16741cc31e629e8132df390ae692873c847d1c2f62c0James Dong temp |= (byte << 24); 16751cc31e629e8132df390ae692873c847d1c2f62c0James Dong 16761cc31e629e8132df390ae692873c847d1c2f62c0James Dong *((uint32*)out) = temp; /* write 4 bytes */ 16771cc31e629e8132df390ae692873c847d1c2f62c0James Dong out += 4; 16781cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 16791cc31e629e8132df390ae692873c847d1c2f62c0James Dong out += offset_out; 16801cc31e629e8132df390ae692873c847d1c2f62c0James Dong in += offset_in; 16811cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 16821cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 16831cc31e629e8132df390ae692873c847d1c2f62c0James Dong else 16841cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 16851cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (j = blkheight; j > 0; j--) 16861cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 16871cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (i = blkwidth; i > 0; i -= 4) 16881cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 16891cc31e629e8132df390ae692873c847d1c2f62c0James Dong temp = *((uint32*)in); 16901cc31e629e8132df390ae692873c847d1c2f62c0James Dong *((uint32*)out) = temp; 16911cc31e629e8132df390ae692873c847d1c2f62c0James Dong in += 4; 16921cc31e629e8132df390ae692873c847d1c2f62c0James Dong out += 4; 16931cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 16941cc31e629e8132df390ae692873c847d1c2f62c0James Dong out += offset_out; 16951cc31e629e8132df390ae692873c847d1c2f62c0James Dong in += offset_in; 16961cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 16971cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 16981cc31e629e8132df390ae692873c847d1c2f62c0James Dong return ; 16991cc31e629e8132df390ae692873c847d1c2f62c0James Dong} 17001cc31e629e8132df390ae692873c847d1c2f62c0James Dong 17011cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid ePadChroma(uint8 *ref, int picwidth, int picheight, int picpitch, int x_pos, int y_pos) 17021cc31e629e8132df390ae692873c847d1c2f62c0James Dong{ 17031cc31e629e8132df390ae692873c847d1c2f62c0James Dong int pad_height; 17041cc31e629e8132df390ae692873c847d1c2f62c0James Dong int pad_width; 17051cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *start; 17061cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint32 word1, word2, word3; 17071cc31e629e8132df390ae692873c847d1c2f62c0James Dong int offset, j; 17081cc31e629e8132df390ae692873c847d1c2f62c0James Dong 17091cc31e629e8132df390ae692873c847d1c2f62c0James Dong 17101cc31e629e8132df390ae692873c847d1c2f62c0James Dong pad_height = 8 + ((y_pos & 7) ? 1 : 0); 17111cc31e629e8132df390ae692873c847d1c2f62c0James Dong pad_width = 8 + ((x_pos & 7) ? 1 : 0); 17121cc31e629e8132df390ae692873c847d1c2f62c0James Dong 17131cc31e629e8132df390ae692873c847d1c2f62c0James Dong y_pos >>= 3; 17141cc31e629e8132df390ae692873c847d1c2f62c0James Dong x_pos >>= 3; 17151cc31e629e8132df390ae692873c847d1c2f62c0James Dong // pad vertical first 17161cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (y_pos < 0) // need to pad up 17171cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 17181cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (x_pos < -8) start = ref - 8; 17191cc31e629e8132df390ae692873c847d1c2f62c0James Dong else if (x_pos + pad_width > picwidth + 7) start = ref + picwidth + 7 - pad_width; 17201cc31e629e8132df390ae692873c847d1c2f62c0James Dong else start = ref + x_pos; 17211cc31e629e8132df390ae692873c847d1c2f62c0James Dong 17221cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* word-align start */ 17231cc31e629e8132df390ae692873c847d1c2f62c0James Dong offset = (uint32)start & 0x3; 17241cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (offset) start -= offset; 17251cc31e629e8132df390ae692873c847d1c2f62c0James Dong 17261cc31e629e8132df390ae692873c847d1c2f62c0James Dong word1 = *((uint32*)start); 17271cc31e629e8132df390ae692873c847d1c2f62c0James Dong word2 = *((uint32*)(start + 4)); 17281cc31e629e8132df390ae692873c847d1c2f62c0James Dong word3 = *((uint32*)(start + 8)); 17291cc31e629e8132df390ae692873c847d1c2f62c0James Dong 17301cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* pad up N rows */ 17311cc31e629e8132df390ae692873c847d1c2f62c0James Dong j = -y_pos; 17321cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (j > 8) j = 8; 17331cc31e629e8132df390ae692873c847d1c2f62c0James Dong while (j--) 17341cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 17351cc31e629e8132df390ae692873c847d1c2f62c0James Dong *((uint32*)(start -= picpitch)) = word1; 17361cc31e629e8132df390ae692873c847d1c2f62c0James Dong *((uint32*)(start + 4)) = word2; 17371cc31e629e8132df390ae692873c847d1c2f62c0James Dong *((uint32*)(start + 8)) = word3; 17381cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 17391cc31e629e8132df390ae692873c847d1c2f62c0James Dong 17401cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 17411cc31e629e8132df390ae692873c847d1c2f62c0James Dong else if (y_pos + pad_height >= picheight) /* pad down */ 17421cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 17431cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (x_pos < -8) start = ref + picpitch * (picheight - 1) - 8; 17441cc31e629e8132df390ae692873c847d1c2f62c0James Dong else if (x_pos + pad_width > picwidth + 7) start = ref + picpitch * (picheight - 1) + 17451cc31e629e8132df390ae692873c847d1c2f62c0James Dong picwidth + 7 - pad_width; 17461cc31e629e8132df390ae692873c847d1c2f62c0James Dong else start = ref + picpitch * (picheight - 1) + x_pos; 17471cc31e629e8132df390ae692873c847d1c2f62c0James Dong 17481cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* word-align start */ 17491cc31e629e8132df390ae692873c847d1c2f62c0James Dong offset = (uint32)start & 0x3; 17501cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (offset) start -= offset; 17511cc31e629e8132df390ae692873c847d1c2f62c0James Dong 17521cc31e629e8132df390ae692873c847d1c2f62c0James Dong word1 = *((uint32*)start); 17531cc31e629e8132df390ae692873c847d1c2f62c0James Dong word2 = *((uint32*)(start + 4)); 17541cc31e629e8132df390ae692873c847d1c2f62c0James Dong word3 = *((uint32*)(start + 8)); 17551cc31e629e8132df390ae692873c847d1c2f62c0James Dong 17561cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* pad down N rows */ 17571cc31e629e8132df390ae692873c847d1c2f62c0James Dong j = y_pos + pad_height - picheight; 17581cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (j > 8) j = 8; 17591cc31e629e8132df390ae692873c847d1c2f62c0James Dong while (j--) 17601cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 17611cc31e629e8132df390ae692873c847d1c2f62c0James Dong *((uint32*)(start += picpitch)) = word1; 17621cc31e629e8132df390ae692873c847d1c2f62c0James Dong *((uint32*)(start + 4)) = word2; 17631cc31e629e8132df390ae692873c847d1c2f62c0James Dong *((uint32*)(start + 8)) = word3; 17641cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 17651cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 17661cc31e629e8132df390ae692873c847d1c2f62c0James Dong 17671cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* now pad horizontal */ 17681cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (x_pos < 0) // pad left 17691cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 17701cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (y_pos < -8) start = ref - (picpitch << 3); 17711cc31e629e8132df390ae692873c847d1c2f62c0James Dong else if (y_pos + pad_height > picheight + 7) start = ref + (picheight + 7 - pad_height) * picpitch; 17721cc31e629e8132df390ae692873c847d1c2f62c0James Dong else start = ref + y_pos * picpitch; 17731cc31e629e8132df390ae692873c847d1c2f62c0James Dong 17741cc31e629e8132df390ae692873c847d1c2f62c0James Dong // now pad left 8 pixels for pad_height rows */ 17751cc31e629e8132df390ae692873c847d1c2f62c0James Dong j = pad_height; 17761cc31e629e8132df390ae692873c847d1c2f62c0James Dong start -= picpitch; 17771cc31e629e8132df390ae692873c847d1c2f62c0James Dong while (j--) 17781cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 17791cc31e629e8132df390ae692873c847d1c2f62c0James Dong word1 = *(start += picpitch); 17801cc31e629e8132df390ae692873c847d1c2f62c0James Dong word1 |= (word1 << 8); 17811cc31e629e8132df390ae692873c847d1c2f62c0James Dong word1 |= (word1 << 16); 17821cc31e629e8132df390ae692873c847d1c2f62c0James Dong *((uint32*)(start - 8)) = word1; 17831cc31e629e8132df390ae692873c847d1c2f62c0James Dong *((uint32*)(start - 4)) = word1; 17841cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 17851cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 17861cc31e629e8132df390ae692873c847d1c2f62c0James Dong else if (x_pos + pad_width >= picwidth) /* pad right */ 17871cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 17881cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (y_pos < -8) start = ref - (picpitch << 3) + picwidth - 1; 17891cc31e629e8132df390ae692873c847d1c2f62c0James Dong else if (y_pos + pad_height > picheight + 7) start = ref + (picheight + 7 - pad_height) * picpitch + picwidth - 1; 17901cc31e629e8132df390ae692873c847d1c2f62c0James Dong else start = ref + y_pos * picpitch + picwidth - 1; 17911cc31e629e8132df390ae692873c847d1c2f62c0James Dong 17921cc31e629e8132df390ae692873c847d1c2f62c0James Dong // now pad right 8 pixels for pad_height rows */ 17931cc31e629e8132df390ae692873c847d1c2f62c0James Dong j = pad_height; 17941cc31e629e8132df390ae692873c847d1c2f62c0James Dong start -= picpitch; 17951cc31e629e8132df390ae692873c847d1c2f62c0James Dong while (j--) 17961cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 17971cc31e629e8132df390ae692873c847d1c2f62c0James Dong word1 = *(start += picpitch); 17981cc31e629e8132df390ae692873c847d1c2f62c0James Dong word1 |= (word1 << 8); 17991cc31e629e8132df390ae692873c847d1c2f62c0James Dong word1 |= (word1 << 16); 18001cc31e629e8132df390ae692873c847d1c2f62c0James Dong *((uint32*)(start + 1)) = word1; 18011cc31e629e8132df390ae692873c847d1c2f62c0James Dong *((uint32*)(start + 5)) = word1; 18021cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 18031cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 18041cc31e629e8132df390ae692873c847d1c2f62c0James Dong 18051cc31e629e8132df390ae692873c847d1c2f62c0James Dong return ; 18061cc31e629e8132df390ae692873c847d1c2f62c0James Dong} 18071cc31e629e8132df390ae692873c847d1c2f62c0James Dong 18081cc31e629e8132df390ae692873c847d1c2f62c0James Dong 18091cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eChromaMotionComp(uint8 *ref, int picwidth, int picheight, 18101cc31e629e8132df390ae692873c847d1c2f62c0James Dong int x_pos, int y_pos, 18111cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *pred, int picpitch, 18121cc31e629e8132df390ae692873c847d1c2f62c0James Dong int blkwidth, int blkheight) 18131cc31e629e8132df390ae692873c847d1c2f62c0James Dong{ 18141cc31e629e8132df390ae692873c847d1c2f62c0James Dong int dx, dy; 18151cc31e629e8132df390ae692873c847d1c2f62c0James Dong int offset_dx, offset_dy; 18161cc31e629e8132df390ae692873c847d1c2f62c0James Dong int index; 18171cc31e629e8132df390ae692873c847d1c2f62c0James Dong 18181cc31e629e8132df390ae692873c847d1c2f62c0James Dong ePadChroma(ref, picwidth, picheight, picpitch, x_pos, y_pos); 18191cc31e629e8132df390ae692873c847d1c2f62c0James Dong 18201cc31e629e8132df390ae692873c847d1c2f62c0James Dong dx = x_pos & 7; 18211cc31e629e8132df390ae692873c847d1c2f62c0James Dong dy = y_pos & 7; 18221cc31e629e8132df390ae692873c847d1c2f62c0James Dong offset_dx = (dx + 7) >> 3; 18231cc31e629e8132df390ae692873c847d1c2f62c0James Dong offset_dy = (dy + 7) >> 3; 18241cc31e629e8132df390ae692873c847d1c2f62c0James Dong x_pos = x_pos >> 3; /* round it to full-pel resolution */ 18251cc31e629e8132df390ae692873c847d1c2f62c0James Dong y_pos = y_pos >> 3; 18261cc31e629e8132df390ae692873c847d1c2f62c0James Dong 18271cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref += y_pos * picpitch + x_pos; 18281cc31e629e8132df390ae692873c847d1c2f62c0James Dong 18291cc31e629e8132df390ae692873c847d1c2f62c0James Dong index = offset_dx + (offset_dy << 1) + ((blkwidth << 1) & 0x7); 18301cc31e629e8132df390ae692873c847d1c2f62c0James Dong 18311cc31e629e8132df390ae692873c847d1c2f62c0James Dong (*(eChromaMC_SIMD[index]))(ref, picpitch , dx, dy, pred, picpitch, blkwidth, blkheight); 18321cc31e629e8132df390ae692873c847d1c2f62c0James Dong return ; 18331cc31e629e8132df390ae692873c847d1c2f62c0James Dong} 18341cc31e629e8132df390ae692873c847d1c2f62c0James Dong 18351cc31e629e8132df390ae692873c847d1c2f62c0James Dong 18361cc31e629e8132df390ae692873c847d1c2f62c0James Dong/* SIMD routines, unroll the loops in vertical direction, decreasing loops (things to be done) */ 18371cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eChromaDiagonalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, 18381cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *pOut, int predPitch, int blkwidth, int blkheight) 18391cc31e629e8132df390ae692873c847d1c2f62c0James Dong{ 18401cc31e629e8132df390ae692873c847d1c2f62c0James Dong int32 r0, r1, r2, r3, result0, result1; 18411cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 temp[288]; 18421cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *ref, *out; 18431cc31e629e8132df390ae692873c847d1c2f62c0James Dong int i, j; 18441cc31e629e8132df390ae692873c847d1c2f62c0James Dong int dx_8 = 8 - dx; 18451cc31e629e8132df390ae692873c847d1c2f62c0James Dong int dy_8 = 8 - dy; 18461cc31e629e8132df390ae692873c847d1c2f62c0James Dong 18471cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* horizontal first */ 18481cc31e629e8132df390ae692873c847d1c2f62c0James Dong out = temp; 18491cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (i = 0; i < blkheight + 1; i++) 18501cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 18511cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref = pRef; 18521cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = ref[0]; 18531cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (j = 0; j < blkwidth; j += 4) 18541cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 18551cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 |= (ref[2] << 16); 18561cc31e629e8132df390ae692873c847d1c2f62c0James Dong result0 = dx_8 * r0; 18571cc31e629e8132df390ae692873c847d1c2f62c0James Dong 18581cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = ref[1] | (ref[3] << 16); 18591cc31e629e8132df390ae692873c847d1c2f62c0James Dong result0 += dx * r1; 18601cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(int32 *)out = result0; 18611cc31e629e8132df390ae692873c847d1c2f62c0James Dong 18621cc31e629e8132df390ae692873c847d1c2f62c0James Dong result0 = dx_8 * r1; 18631cc31e629e8132df390ae692873c847d1c2f62c0James Dong 18641cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = ref[4]; 18651cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = r0 >> 16; 18661cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = r0 | (r2 << 16); 18671cc31e629e8132df390ae692873c847d1c2f62c0James Dong result0 += dx * r1; 18681cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(int32 *)(out + 16) = result0; 18691cc31e629e8132df390ae692873c847d1c2f62c0James Dong 18701cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref += 4; 18711cc31e629e8132df390ae692873c847d1c2f62c0James Dong out += 4; 18721cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = r2; 18731cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 18741cc31e629e8132df390ae692873c847d1c2f62c0James Dong pRef += srcPitch; 18751cc31e629e8132df390ae692873c847d1c2f62c0James Dong out += (32 - blkwidth); 18761cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 18771cc31e629e8132df390ae692873c847d1c2f62c0James Dong 18781cc31e629e8132df390ae692873c847d1c2f62c0James Dong// pRef -= srcPitch*(blkheight+1); 18791cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref = temp; 18801cc31e629e8132df390ae692873c847d1c2f62c0James Dong 18811cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (j = 0; j < blkwidth; j += 4) 18821cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 18831cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *(int32 *)ref; 18841cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *(int32 *)(ref + 16); 18851cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref += 32; 18861cc31e629e8132df390ae692873c847d1c2f62c0James Dong out = pOut; 18871cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (i = 0; i < (blkheight >> 1); i++) 18881cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 18891cc31e629e8132df390ae692873c847d1c2f62c0James Dong result0 = dy_8 * r0 + 0x00200020; 18901cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *(int32 *)ref; 18911cc31e629e8132df390ae692873c847d1c2f62c0James Dong result0 += dy * r2; 18921cc31e629e8132df390ae692873c847d1c2f62c0James Dong result0 >>= 6; 18931cc31e629e8132df390ae692873c847d1c2f62c0James Dong result0 &= 0x00FF00FF; 18941cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = r2; 18951cc31e629e8132df390ae692873c847d1c2f62c0James Dong 18961cc31e629e8132df390ae692873c847d1c2f62c0James Dong result1 = dy_8 * r1 + 0x00200020; 18971cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = *(int32 *)(ref + 16); 18981cc31e629e8132df390ae692873c847d1c2f62c0James Dong result1 += dy * r3; 18991cc31e629e8132df390ae692873c847d1c2f62c0James Dong result1 >>= 6; 19001cc31e629e8132df390ae692873c847d1c2f62c0James Dong result1 &= 0x00FF00FF; 19011cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = r3; 19021cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(int32 *)out = result0 | (result1 << 8); 19031cc31e629e8132df390ae692873c847d1c2f62c0James Dong out += predPitch; 19041cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref += 32; 19051cc31e629e8132df390ae692873c847d1c2f62c0James Dong 19061cc31e629e8132df390ae692873c847d1c2f62c0James Dong result0 = dy_8 * r0 + 0x00200020; 19071cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = *(int32 *)ref; 19081cc31e629e8132df390ae692873c847d1c2f62c0James Dong result0 += dy * r2; 19091cc31e629e8132df390ae692873c847d1c2f62c0James Dong result0 >>= 6; 19101cc31e629e8132df390ae692873c847d1c2f62c0James Dong result0 &= 0x00FF00FF; 19111cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = r2; 19121cc31e629e8132df390ae692873c847d1c2f62c0James Dong 19131cc31e629e8132df390ae692873c847d1c2f62c0James Dong result1 = dy_8 * r1 + 0x00200020; 19141cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = *(int32 *)(ref + 16); 19151cc31e629e8132df390ae692873c847d1c2f62c0James Dong result1 += dy * r3; 19161cc31e629e8132df390ae692873c847d1c2f62c0James Dong result1 >>= 6; 19171cc31e629e8132df390ae692873c847d1c2f62c0James Dong result1 &= 0x00FF00FF; 19181cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = r3; 19191cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(int32 *)out = result0 | (result1 << 8); 19201cc31e629e8132df390ae692873c847d1c2f62c0James Dong out += predPitch; 19211cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref += 32; 19221cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 19231cc31e629e8132df390ae692873c847d1c2f62c0James Dong pOut += 4; 19241cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref = temp + 4; /* since it can only iterate twice max */ 19251cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 19261cc31e629e8132df390ae692873c847d1c2f62c0James Dong return; 19271cc31e629e8132df390ae692873c847d1c2f62c0James Dong} 19281cc31e629e8132df390ae692873c847d1c2f62c0James Dong 19291cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eChromaHorizontalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, 19301cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *pOut, int predPitch, int blkwidth, int blkheight) 19311cc31e629e8132df390ae692873c847d1c2f62c0James Dong{ 19321cc31e629e8132df390ae692873c847d1c2f62c0James Dong (void)(dy); 19331cc31e629e8132df390ae692873c847d1c2f62c0James Dong 19341cc31e629e8132df390ae692873c847d1c2f62c0James Dong int32 r0, r1, r2, result0, result1; 19351cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *ref, *out; 19361cc31e629e8132df390ae692873c847d1c2f62c0James Dong int i, j; 19371cc31e629e8132df390ae692873c847d1c2f62c0James Dong int dx_8 = 8 - dx; 19381cc31e629e8132df390ae692873c847d1c2f62c0James Dong 19391cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* horizontal first */ 19401cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (i = 0; i < blkheight; i++) 19411cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 19421cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref = pRef; 19431cc31e629e8132df390ae692873c847d1c2f62c0James Dong out = pOut; 19441cc31e629e8132df390ae692873c847d1c2f62c0James Dong 19451cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = ref[0]; 19461cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (j = 0; j < blkwidth; j += 4) 19471cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 19481cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 |= (ref[2] << 16); 19491cc31e629e8132df390ae692873c847d1c2f62c0James Dong result0 = dx_8 * r0 + 0x00040004; 19501cc31e629e8132df390ae692873c847d1c2f62c0James Dong 19511cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = ref[1] | (ref[3] << 16); 19521cc31e629e8132df390ae692873c847d1c2f62c0James Dong result0 += dx * r1; 19531cc31e629e8132df390ae692873c847d1c2f62c0James Dong result0 >>= 3; 19541cc31e629e8132df390ae692873c847d1c2f62c0James Dong result0 &= 0x00FF00FF; 19551cc31e629e8132df390ae692873c847d1c2f62c0James Dong 19561cc31e629e8132df390ae692873c847d1c2f62c0James Dong result1 = dx_8 * r1 + 0x00040004; 19571cc31e629e8132df390ae692873c847d1c2f62c0James Dong 19581cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = ref[4]; 19591cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = r0 >> 16; 19601cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = r0 | (r2 << 16); 19611cc31e629e8132df390ae692873c847d1c2f62c0James Dong result1 += dx * r1; 19621cc31e629e8132df390ae692873c847d1c2f62c0James Dong result1 >>= 3; 19631cc31e629e8132df390ae692873c847d1c2f62c0James Dong result1 &= 0x00FF00FF; 19641cc31e629e8132df390ae692873c847d1c2f62c0James Dong 19651cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(int32 *)out = result0 | (result1 << 8); 19661cc31e629e8132df390ae692873c847d1c2f62c0James Dong 19671cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref += 4; 19681cc31e629e8132df390ae692873c847d1c2f62c0James Dong out += 4; 19691cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = r2; 19701cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 19711cc31e629e8132df390ae692873c847d1c2f62c0James Dong 19721cc31e629e8132df390ae692873c847d1c2f62c0James Dong pRef += srcPitch; 19731cc31e629e8132df390ae692873c847d1c2f62c0James Dong pOut += predPitch; 19741cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 19751cc31e629e8132df390ae692873c847d1c2f62c0James Dong return; 19761cc31e629e8132df390ae692873c847d1c2f62c0James Dong} 19771cc31e629e8132df390ae692873c847d1c2f62c0James Dong 19781cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eChromaVerticalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, 19791cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *pOut, int predPitch, int blkwidth, int blkheight) 19801cc31e629e8132df390ae692873c847d1c2f62c0James Dong{ 19811cc31e629e8132df390ae692873c847d1c2f62c0James Dong (void)(dx); 19821cc31e629e8132df390ae692873c847d1c2f62c0James Dong 19831cc31e629e8132df390ae692873c847d1c2f62c0James Dong int32 r0, r1, r2, r3, result0, result1; 19841cc31e629e8132df390ae692873c847d1c2f62c0James Dong int i, j; 19851cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *ref, *out; 19861cc31e629e8132df390ae692873c847d1c2f62c0James Dong int dy_8 = 8 - dy; 19871cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* vertical first */ 19881cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (i = 0; i < blkwidth; i += 4) 19891cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 19901cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref = pRef; 19911cc31e629e8132df390ae692873c847d1c2f62c0James Dong out = pOut; 19921cc31e629e8132df390ae692873c847d1c2f62c0James Dong 19931cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = ref[0] | (ref[2] << 16); 19941cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = ref[1] | (ref[3] << 16); 19951cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref += srcPitch; 19961cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (j = 0; j < blkheight; j++) 19971cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 19981cc31e629e8132df390ae692873c847d1c2f62c0James Dong result0 = dy_8 * r0 + 0x00040004; 19991cc31e629e8132df390ae692873c847d1c2f62c0James Dong r2 = ref[0] | (ref[2] << 16); 20001cc31e629e8132df390ae692873c847d1c2f62c0James Dong result0 += dy * r2; 20011cc31e629e8132df390ae692873c847d1c2f62c0James Dong result0 >>= 3; 20021cc31e629e8132df390ae692873c847d1c2f62c0James Dong result0 &= 0x00FF00FF; 20031cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = r2; 20041cc31e629e8132df390ae692873c847d1c2f62c0James Dong 20051cc31e629e8132df390ae692873c847d1c2f62c0James Dong result1 = dy_8 * r1 + 0x00040004; 20061cc31e629e8132df390ae692873c847d1c2f62c0James Dong r3 = ref[1] | (ref[3] << 16); 20071cc31e629e8132df390ae692873c847d1c2f62c0James Dong result1 += dy * r3; 20081cc31e629e8132df390ae692873c847d1c2f62c0James Dong result1 >>= 3; 20091cc31e629e8132df390ae692873c847d1c2f62c0James Dong result1 &= 0x00FF00FF; 20101cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = r3; 20111cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(int32 *)out = result0 | (result1 << 8); 20121cc31e629e8132df390ae692873c847d1c2f62c0James Dong ref += srcPitch; 20131cc31e629e8132df390ae692873c847d1c2f62c0James Dong out += predPitch; 20141cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 20151cc31e629e8132df390ae692873c847d1c2f62c0James Dong pOut += 4; 20161cc31e629e8132df390ae692873c847d1c2f62c0James Dong pRef += 4; 20171cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 20181cc31e629e8132df390ae692873c847d1c2f62c0James Dong return; 20191cc31e629e8132df390ae692873c847d1c2f62c0James Dong} 20201cc31e629e8132df390ae692873c847d1c2f62c0James Dong 20211cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eChromaDiagonalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, 20221cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *pOut, int predPitch, int blkwidth, int blkheight) 20231cc31e629e8132df390ae692873c847d1c2f62c0James Dong{ 20241cc31e629e8132df390ae692873c847d1c2f62c0James Dong (void)(blkwidth); 20251cc31e629e8132df390ae692873c847d1c2f62c0James Dong 20261cc31e629e8132df390ae692873c847d1c2f62c0James Dong int32 r0, r1, temp0, temp1, result; 20271cc31e629e8132df390ae692873c847d1c2f62c0James Dong int32 temp[9]; 20281cc31e629e8132df390ae692873c847d1c2f62c0James Dong int32 *out; 20291cc31e629e8132df390ae692873c847d1c2f62c0James Dong int i, r_temp; 20301cc31e629e8132df390ae692873c847d1c2f62c0James Dong int dy_8 = 8 - dy; 20311cc31e629e8132df390ae692873c847d1c2f62c0James Dong 20321cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* horizontal first */ 20331cc31e629e8132df390ae692873c847d1c2f62c0James Dong out = temp; 20341cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (i = 0; i < blkheight + 1; i++) 20351cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 20361cc31e629e8132df390ae692873c847d1c2f62c0James Dong r_temp = pRef[1]; 20371cc31e629e8132df390ae692873c847d1c2f62c0James Dong temp0 = (pRef[0] << 3) + dx * (r_temp - pRef[0]); 20381cc31e629e8132df390ae692873c847d1c2f62c0James Dong temp1 = (r_temp << 3) + dx * (pRef[2] - r_temp); 20391cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = temp0 | (temp1 << 16); 20401cc31e629e8132df390ae692873c847d1c2f62c0James Dong *out++ = r0; 20411cc31e629e8132df390ae692873c847d1c2f62c0James Dong pRef += srcPitch; 20421cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 20431cc31e629e8132df390ae692873c847d1c2f62c0James Dong 20441cc31e629e8132df390ae692873c847d1c2f62c0James Dong pRef -= srcPitch * (blkheight + 1); 20451cc31e629e8132df390ae692873c847d1c2f62c0James Dong 20461cc31e629e8132df390ae692873c847d1c2f62c0James Dong out = temp; 20471cc31e629e8132df390ae692873c847d1c2f62c0James Dong 20481cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = *out++; 20491cc31e629e8132df390ae692873c847d1c2f62c0James Dong 20501cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (i = 0; i < blkheight; i++) 20511cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 20521cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = dy_8 * r0 + 0x00200020; 20531cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = *out++; 20541cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += dy * r1; 20551cc31e629e8132df390ae692873c847d1c2f62c0James Dong result >>= 6; 20561cc31e629e8132df390ae692873c847d1c2f62c0James Dong result &= 0x00FF00FF; 20571cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(int16 *)pOut = (result >> 8) | (result & 0xFF); 20581cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = r1; 20591cc31e629e8132df390ae692873c847d1c2f62c0James Dong pOut += predPitch; 20601cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 20611cc31e629e8132df390ae692873c847d1c2f62c0James Dong return; 20621cc31e629e8132df390ae692873c847d1c2f62c0James Dong} 20631cc31e629e8132df390ae692873c847d1c2f62c0James Dong 20641cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eChromaHorizontalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, 20651cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *pOut, int predPitch, int blkwidth, int blkheight) 20661cc31e629e8132df390ae692873c847d1c2f62c0James Dong{ 20671cc31e629e8132df390ae692873c847d1c2f62c0James Dong (void)(dy); 20681cc31e629e8132df390ae692873c847d1c2f62c0James Dong (void)(blkwidth); 20691cc31e629e8132df390ae692873c847d1c2f62c0James Dong 20701cc31e629e8132df390ae692873c847d1c2f62c0James Dong int i, temp, temp0, temp1; 20711cc31e629e8132df390ae692873c847d1c2f62c0James Dong 20721cc31e629e8132df390ae692873c847d1c2f62c0James Dong /* horizontal first */ 20731cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (i = 0; i < blkheight; i++) 20741cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 20751cc31e629e8132df390ae692873c847d1c2f62c0James Dong temp = pRef[1]; 20761cc31e629e8132df390ae692873c847d1c2f62c0James Dong temp0 = ((pRef[0] << 3) + dx * (temp - pRef[0]) + 4) >> 3; 20771cc31e629e8132df390ae692873c847d1c2f62c0James Dong temp1 = ((temp << 3) + dx * (pRef[2] - temp) + 4) >> 3; 20781cc31e629e8132df390ae692873c847d1c2f62c0James Dong 20791cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(int16 *)pOut = temp0 | (temp1 << 8); 20801cc31e629e8132df390ae692873c847d1c2f62c0James Dong pRef += srcPitch; 20811cc31e629e8132df390ae692873c847d1c2f62c0James Dong pOut += predPitch; 20821cc31e629e8132df390ae692873c847d1c2f62c0James Dong 20831cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 20841cc31e629e8132df390ae692873c847d1c2f62c0James Dong return; 20851cc31e629e8132df390ae692873c847d1c2f62c0James Dong} 20861cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eChromaVerticalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, 20871cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *pOut, int predPitch, int blkwidth, int blkheight) 20881cc31e629e8132df390ae692873c847d1c2f62c0James Dong{ 20891cc31e629e8132df390ae692873c847d1c2f62c0James Dong (void)(dx); 20901cc31e629e8132df390ae692873c847d1c2f62c0James Dong (void)(blkwidth); 20911cc31e629e8132df390ae692873c847d1c2f62c0James Dong 20921cc31e629e8132df390ae692873c847d1c2f62c0James Dong int32 r0, r1, result; 20931cc31e629e8132df390ae692873c847d1c2f62c0James Dong int i; 20941cc31e629e8132df390ae692873c847d1c2f62c0James Dong int dy_8 = 8 - dy; 20951cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = pRef[0] | (pRef[1] << 16); 20961cc31e629e8132df390ae692873c847d1c2f62c0James Dong pRef += srcPitch; 20971cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (i = 0; i < blkheight; i++) 20981cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 20991cc31e629e8132df390ae692873c847d1c2f62c0James Dong result = dy_8 * r0 + 0x00040004; 21001cc31e629e8132df390ae692873c847d1c2f62c0James Dong r1 = pRef[0] | (pRef[1] << 16); 21011cc31e629e8132df390ae692873c847d1c2f62c0James Dong result += dy * r1; 21021cc31e629e8132df390ae692873c847d1c2f62c0James Dong result >>= 3; 21031cc31e629e8132df390ae692873c847d1c2f62c0James Dong result &= 0x00FF00FF; 21041cc31e629e8132df390ae692873c847d1c2f62c0James Dong *(int16 *)pOut = (result >> 8) | (result & 0xFF); 21051cc31e629e8132df390ae692873c847d1c2f62c0James Dong r0 = r1; 21061cc31e629e8132df390ae692873c847d1c2f62c0James Dong pRef += srcPitch; 21071cc31e629e8132df390ae692873c847d1c2f62c0James Dong pOut += predPitch; 21081cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 21091cc31e629e8132df390ae692873c847d1c2f62c0James Dong return; 21101cc31e629e8132df390ae692873c847d1c2f62c0James Dong} 21111cc31e629e8132df390ae692873c847d1c2f62c0James Dong 21121cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eChromaFullMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, 21131cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 *pOut, int predPitch, int blkwidth, int blkheight) 21141cc31e629e8132df390ae692873c847d1c2f62c0James Dong{ 21151cc31e629e8132df390ae692873c847d1c2f62c0James Dong (void)(dx); 21161cc31e629e8132df390ae692873c847d1c2f62c0James Dong (void)(dy); 21171cc31e629e8132df390ae692873c847d1c2f62c0James Dong 21181cc31e629e8132df390ae692873c847d1c2f62c0James Dong int i, j; 21191cc31e629e8132df390ae692873c847d1c2f62c0James Dong int offset_in = srcPitch - blkwidth; 21201cc31e629e8132df390ae692873c847d1c2f62c0James Dong int offset_out = predPitch - blkwidth; 21211cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint16 temp; 21221cc31e629e8132df390ae692873c847d1c2f62c0James Dong uint8 byte; 21231cc31e629e8132df390ae692873c847d1c2f62c0James Dong 21241cc31e629e8132df390ae692873c847d1c2f62c0James Dong if (((uint32)pRef)&1) 21251cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 21261cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (j = blkheight; j > 0; j--) 21271cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 21281cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (i = blkwidth; i > 0; i -= 2) 21291cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 21301cc31e629e8132df390ae692873c847d1c2f62c0James Dong temp = *pRef++; 21311cc31e629e8132df390ae692873c847d1c2f62c0James Dong byte = *pRef++; 21321cc31e629e8132df390ae692873c847d1c2f62c0James Dong temp |= (byte << 8); 21331cc31e629e8132df390ae692873c847d1c2f62c0James Dong *((uint16*)pOut) = temp; /* write 2 bytes */ 21341cc31e629e8132df390ae692873c847d1c2f62c0James Dong pOut += 2; 21351cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 21361cc31e629e8132df390ae692873c847d1c2f62c0James Dong pOut += offset_out; 21371cc31e629e8132df390ae692873c847d1c2f62c0James Dong pRef += offset_in; 21381cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 21391cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 21401cc31e629e8132df390ae692873c847d1c2f62c0James Dong else 21411cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 21421cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (j = blkheight; j > 0; j--) 21431cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 21441cc31e629e8132df390ae692873c847d1c2f62c0James Dong for (i = blkwidth; i > 0; i -= 2) 21451cc31e629e8132df390ae692873c847d1c2f62c0James Dong { 21461cc31e629e8132df390ae692873c847d1c2f62c0James Dong temp = *((uint16*)pRef); 21471cc31e629e8132df390ae692873c847d1c2f62c0James Dong *((uint16*)pOut) = temp; 21481cc31e629e8132df390ae692873c847d1c2f62c0James Dong pRef += 2; 21491cc31e629e8132df390ae692873c847d1c2f62c0James Dong pOut += 2; 21501cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 21511cc31e629e8132df390ae692873c847d1c2f62c0James Dong pOut += offset_out; 21521cc31e629e8132df390ae692873c847d1c2f62c0James Dong pRef += offset_in; 21531cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 21541cc31e629e8132df390ae692873c847d1c2f62c0James Dong } 21551cc31e629e8132df390ae692873c847d1c2f62c0James Dong return ; 21561cc31e629e8132df390ae692873c847d1c2f62c0James Dong} 2157