129a84457aed4c45bc900998b5e11c03023264208James Dong/* ------------------------------------------------------------------ 229a84457aed4c45bc900998b5e11c03023264208James Dong * Copyright (C) 1998-2009 PacketVideo 329a84457aed4c45bc900998b5e11c03023264208James Dong * 429a84457aed4c45bc900998b5e11c03023264208James Dong * Licensed under the Apache License, Version 2.0 (the "License"); 529a84457aed4c45bc900998b5e11c03023264208James Dong * you may not use this file except in compliance with the License. 629a84457aed4c45bc900998b5e11c03023264208James Dong * You may obtain a copy of the License at 729a84457aed4c45bc900998b5e11c03023264208James Dong * 829a84457aed4c45bc900998b5e11c03023264208James Dong * http://www.apache.org/licenses/LICENSE-2.0 929a84457aed4c45bc900998b5e11c03023264208James Dong * 1029a84457aed4c45bc900998b5e11c03023264208James Dong * Unless required by applicable law or agreed to in writing, software 1129a84457aed4c45bc900998b5e11c03023264208James Dong * distributed under the License is distributed on an "AS IS" BASIS, 1229a84457aed4c45bc900998b5e11c03023264208James Dong * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 1329a84457aed4c45bc900998b5e11c03023264208James Dong * express or implied. 1429a84457aed4c45bc900998b5e11c03023264208James Dong * See the License for the specific language governing permissions 1529a84457aed4c45bc900998b5e11c03023264208James Dong * and limitations under the License. 1629a84457aed4c45bc900998b5e11c03023264208James Dong * ------------------------------------------------------------------- 1729a84457aed4c45bc900998b5e11c03023264208James Dong */ 1829a84457aed4c45bc900998b5e11c03023264208James Dong#include "avcenc_lib.h" 1929a84457aed4c45bc900998b5e11c03023264208James Dong#include "avcenc_int.h" 2029a84457aed4c45bc900998b5e11c03023264208James Dong 2129a84457aed4c45bc900998b5e11c03023264208James Dong 2229a84457aed4c45bc900998b5e11c03023264208James Dong#define CLIP_RESULT(x) if((uint)x > 0xFF){ \ 2329a84457aed4c45bc900998b5e11c03023264208James Dong x = 0xFF & (~(x>>31));} 2429a84457aed4c45bc900998b5e11c03023264208James Dong 2529a84457aed4c45bc900998b5e11c03023264208James Dong/* (blkwidth << 2) + (dy << 1) + dx */ 2629a84457aed4c45bc900998b5e11c03023264208James Dongstatic void (*const eChromaMC_SIMD[8])(uint8 *, int , int , int , uint8 *, int, int , int) = 2729a84457aed4c45bc900998b5e11c03023264208James Dong{ 2829a84457aed4c45bc900998b5e11c03023264208James Dong &eChromaFullMC_SIMD, 2929a84457aed4c45bc900998b5e11c03023264208James Dong &eChromaHorizontalMC_SIMD, 3029a84457aed4c45bc900998b5e11c03023264208James Dong &eChromaVerticalMC_SIMD, 3129a84457aed4c45bc900998b5e11c03023264208James Dong &eChromaDiagonalMC_SIMD, 3229a84457aed4c45bc900998b5e11c03023264208James Dong &eChromaFullMC_SIMD, 3329a84457aed4c45bc900998b5e11c03023264208James Dong &eChromaHorizontalMC2_SIMD, 3429a84457aed4c45bc900998b5e11c03023264208James Dong &eChromaVerticalMC2_SIMD, 3529a84457aed4c45bc900998b5e11c03023264208James Dong &eChromaDiagonalMC2_SIMD 3629a84457aed4c45bc900998b5e11c03023264208James Dong}; 3729a84457aed4c45bc900998b5e11c03023264208James Dong/* Perform motion prediction and compensation with residue if exist. */ 3829a84457aed4c45bc900998b5e11c03023264208James Dongvoid AVCMBMotionComp(AVCEncObject *encvid, AVCCommonObj *video) 3929a84457aed4c45bc900998b5e11c03023264208James Dong{ 4029a84457aed4c45bc900998b5e11c03023264208James Dong (void)(encvid); 4129a84457aed4c45bc900998b5e11c03023264208James Dong 4229a84457aed4c45bc900998b5e11c03023264208James Dong AVCMacroblock *currMB = video->currMB; 4329a84457aed4c45bc900998b5e11c03023264208James Dong AVCPictureData *currPic = video->currPic; 4429a84457aed4c45bc900998b5e11c03023264208James Dong int mbPartIdx, subMbPartIdx; 4529a84457aed4c45bc900998b5e11c03023264208James Dong int ref_idx; 4629a84457aed4c45bc900998b5e11c03023264208James Dong int offset_MbPart_indx = 0; 4729a84457aed4c45bc900998b5e11c03023264208James Dong int16 *mv; 4829a84457aed4c45bc900998b5e11c03023264208James Dong uint32 x_pos, y_pos; 4929a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *curL, *curCb, *curCr; 5029a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *ref_l, *ref_Cb, *ref_Cr; 5129a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *predBlock, *predCb, *predCr; 5229a84457aed4c45bc900998b5e11c03023264208James Dong int block_x, block_y, offset_x, offset_y, offsetP, offset; 5329a84457aed4c45bc900998b5e11c03023264208James Dong int x_position = (video->mb_x << 4); 5429a84457aed4c45bc900998b5e11c03023264208James Dong int y_position = (video->mb_y << 4); 5529a84457aed4c45bc900998b5e11c03023264208James Dong int MbHeight, MbWidth, mbPartIdx_X, mbPartIdx_Y, offset_indx; 5629a84457aed4c45bc900998b5e11c03023264208James Dong int picWidth = currPic->width; 5729a84457aed4c45bc900998b5e11c03023264208James Dong int picPitch = currPic->pitch; 5829a84457aed4c45bc900998b5e11c03023264208James Dong int picHeight = currPic->height; 5929a84457aed4c45bc900998b5e11c03023264208James Dong uint32 tmp_word; 6029a84457aed4c45bc900998b5e11c03023264208James Dong 6129a84457aed4c45bc900998b5e11c03023264208James Dong tmp_word = y_position * picPitch; 6229a84457aed4c45bc900998b5e11c03023264208James Dong curL = currPic->Sl + tmp_word + x_position; 6329a84457aed4c45bc900998b5e11c03023264208James Dong offset = (tmp_word >> 2) + (x_position >> 1); 6429a84457aed4c45bc900998b5e11c03023264208James Dong curCb = currPic->Scb + offset; 6529a84457aed4c45bc900998b5e11c03023264208James Dong curCr = currPic->Scr + offset; 6629a84457aed4c45bc900998b5e11c03023264208James Dong 6729a84457aed4c45bc900998b5e11c03023264208James Dong predBlock = curL; 6829a84457aed4c45bc900998b5e11c03023264208James Dong predCb = curCb; 6929a84457aed4c45bc900998b5e11c03023264208James Dong predCr = curCr; 7029a84457aed4c45bc900998b5e11c03023264208James Dong 7129a84457aed4c45bc900998b5e11c03023264208James Dong GetMotionVectorPredictor(video, 1); 7229a84457aed4c45bc900998b5e11c03023264208James Dong 7329a84457aed4c45bc900998b5e11c03023264208James Dong for (mbPartIdx = 0; mbPartIdx < currMB->NumMbPart; mbPartIdx++) 7429a84457aed4c45bc900998b5e11c03023264208James Dong { 7529a84457aed4c45bc900998b5e11c03023264208James Dong MbHeight = currMB->SubMbPartHeight[mbPartIdx]; 7629a84457aed4c45bc900998b5e11c03023264208James Dong MbWidth = currMB->SubMbPartWidth[mbPartIdx]; 7729a84457aed4c45bc900998b5e11c03023264208James Dong mbPartIdx_X = ((mbPartIdx + offset_MbPart_indx) & 1); 7829a84457aed4c45bc900998b5e11c03023264208James Dong mbPartIdx_Y = (mbPartIdx + offset_MbPart_indx) >> 1; 7929a84457aed4c45bc900998b5e11c03023264208James Dong ref_idx = currMB->ref_idx_L0[(mbPartIdx_Y << 1) + mbPartIdx_X]; 8029a84457aed4c45bc900998b5e11c03023264208James Dong offset_indx = 0; 8129a84457aed4c45bc900998b5e11c03023264208James Dong 8229a84457aed4c45bc900998b5e11c03023264208James Dong ref_l = video->RefPicList0[ref_idx]->Sl; 8329a84457aed4c45bc900998b5e11c03023264208James Dong ref_Cb = video->RefPicList0[ref_idx]->Scb; 8429a84457aed4c45bc900998b5e11c03023264208James Dong ref_Cr = video->RefPicList0[ref_idx]->Scr; 8529a84457aed4c45bc900998b5e11c03023264208James Dong 8629a84457aed4c45bc900998b5e11c03023264208James Dong for (subMbPartIdx = 0; subMbPartIdx < currMB->NumSubMbPart[mbPartIdx]; subMbPartIdx++) 8729a84457aed4c45bc900998b5e11c03023264208James Dong { 8829a84457aed4c45bc900998b5e11c03023264208James Dong block_x = (mbPartIdx_X << 1) + ((subMbPartIdx + offset_indx) & 1); 8929a84457aed4c45bc900998b5e11c03023264208James Dong block_y = (mbPartIdx_Y << 1) + (((subMbPartIdx + offset_indx) >> 1) & 1); 9029a84457aed4c45bc900998b5e11c03023264208James Dong mv = (int16*)(currMB->mvL0 + block_x + (block_y << 2)); 9129a84457aed4c45bc900998b5e11c03023264208James Dong offset_x = x_position + (block_x << 2); 9229a84457aed4c45bc900998b5e11c03023264208James Dong offset_y = y_position + (block_y << 2); 9329a84457aed4c45bc900998b5e11c03023264208James Dong x_pos = (offset_x << 2) + *mv++; /*quarter pel */ 9429a84457aed4c45bc900998b5e11c03023264208James Dong y_pos = (offset_y << 2) + *mv; /*quarter pel */ 9529a84457aed4c45bc900998b5e11c03023264208James Dong 9629a84457aed4c45bc900998b5e11c03023264208James Dong //offset = offset_y * currPic->width; 9729a84457aed4c45bc900998b5e11c03023264208James Dong //offsetC = (offset >> 2) + (offset_x >> 1); 9829a84457aed4c45bc900998b5e11c03023264208James Dong offsetP = (block_y << 2) * picPitch + (block_x << 2); 9929a84457aed4c45bc900998b5e11c03023264208James Dong eLumaMotionComp(ref_l, picPitch, picHeight, x_pos, y_pos, 10029a84457aed4c45bc900998b5e11c03023264208James Dong /*comp_Sl + offset + offset_x,*/ 10129a84457aed4c45bc900998b5e11c03023264208James Dong predBlock + offsetP, picPitch, MbWidth, MbHeight); 10229a84457aed4c45bc900998b5e11c03023264208James Dong 10329a84457aed4c45bc900998b5e11c03023264208James Dong offsetP = (block_y * picWidth) + (block_x << 1); 10429a84457aed4c45bc900998b5e11c03023264208James Dong eChromaMotionComp(ref_Cb, picWidth >> 1, picHeight >> 1, x_pos, y_pos, 10529a84457aed4c45bc900998b5e11c03023264208James Dong /*comp_Scb + offsetC,*/ 10629a84457aed4c45bc900998b5e11c03023264208James Dong predCb + offsetP, picPitch >> 1, MbWidth >> 1, MbHeight >> 1); 10729a84457aed4c45bc900998b5e11c03023264208James Dong eChromaMotionComp(ref_Cr, picWidth >> 1, picHeight >> 1, x_pos, y_pos, 10829a84457aed4c45bc900998b5e11c03023264208James Dong /*comp_Scr + offsetC,*/ 10929a84457aed4c45bc900998b5e11c03023264208James Dong predCr + offsetP, picPitch >> 1, MbWidth >> 1, MbHeight >> 1); 11029a84457aed4c45bc900998b5e11c03023264208James Dong 11129a84457aed4c45bc900998b5e11c03023264208James Dong offset_indx = currMB->SubMbPartWidth[mbPartIdx] >> 3; 11229a84457aed4c45bc900998b5e11c03023264208James Dong } 11329a84457aed4c45bc900998b5e11c03023264208James Dong offset_MbPart_indx = currMB->MbPartWidth >> 4; 11429a84457aed4c45bc900998b5e11c03023264208James Dong } 11529a84457aed4c45bc900998b5e11c03023264208James Dong 11629a84457aed4c45bc900998b5e11c03023264208James Dong return ; 11729a84457aed4c45bc900998b5e11c03023264208James Dong} 11829a84457aed4c45bc900998b5e11c03023264208James Dong 11929a84457aed4c45bc900998b5e11c03023264208James Dong 12029a84457aed4c45bc900998b5e11c03023264208James Dong/* preform the actual motion comp here */ 12129a84457aed4c45bc900998b5e11c03023264208James Dongvoid eLumaMotionComp(uint8 *ref, int picpitch, int picheight, 12229a84457aed4c45bc900998b5e11c03023264208James Dong int x_pos, int y_pos, 12329a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *pred, int pred_pitch, 12429a84457aed4c45bc900998b5e11c03023264208James Dong int blkwidth, int blkheight) 12529a84457aed4c45bc900998b5e11c03023264208James Dong{ 12629a84457aed4c45bc900998b5e11c03023264208James Dong (void)(picheight); 12729a84457aed4c45bc900998b5e11c03023264208James Dong 12829a84457aed4c45bc900998b5e11c03023264208James Dong int dx, dy; 12929a84457aed4c45bc900998b5e11c03023264208James Dong int temp2[21][21]; /* for intermediate results */ 13029a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *ref2; 13129a84457aed4c45bc900998b5e11c03023264208James Dong 13229a84457aed4c45bc900998b5e11c03023264208James Dong dx = x_pos & 3; 13329a84457aed4c45bc900998b5e11c03023264208James Dong dy = y_pos & 3; 13429a84457aed4c45bc900998b5e11c03023264208James Dong x_pos = x_pos >> 2; /* round it to full-pel resolution */ 13529a84457aed4c45bc900998b5e11c03023264208James Dong y_pos = y_pos >> 2; 13629a84457aed4c45bc900998b5e11c03023264208James Dong 13729a84457aed4c45bc900998b5e11c03023264208James Dong /* perform actual motion compensation */ 13829a84457aed4c45bc900998b5e11c03023264208James Dong if (dx == 0 && dy == 0) 13929a84457aed4c45bc900998b5e11c03023264208James Dong { /* fullpel position *//* G */ 14029a84457aed4c45bc900998b5e11c03023264208James Dong 14129a84457aed4c45bc900998b5e11c03023264208James Dong ref += y_pos * picpitch + x_pos; 14229a84457aed4c45bc900998b5e11c03023264208James Dong 14329a84457aed4c45bc900998b5e11c03023264208James Dong eFullPelMC(ref, picpitch, pred, pred_pitch, blkwidth, blkheight); 14429a84457aed4c45bc900998b5e11c03023264208James Dong 14529a84457aed4c45bc900998b5e11c03023264208James Dong } /* other positions */ 14629a84457aed4c45bc900998b5e11c03023264208James Dong else if (dy == 0) 14729a84457aed4c45bc900998b5e11c03023264208James Dong { /* no vertical interpolation *//* a,b,c*/ 14829a84457aed4c45bc900998b5e11c03023264208James Dong 14929a84457aed4c45bc900998b5e11c03023264208James Dong ref += y_pos * picpitch + x_pos; 15029a84457aed4c45bc900998b5e11c03023264208James Dong 15129a84457aed4c45bc900998b5e11c03023264208James Dong eHorzInterp1MC(ref, picpitch, pred, pred_pitch, blkwidth, blkheight, dx); 15229a84457aed4c45bc900998b5e11c03023264208James Dong } 15329a84457aed4c45bc900998b5e11c03023264208James Dong else if (dx == 0) 15429a84457aed4c45bc900998b5e11c03023264208James Dong { /*no horizontal interpolation *//* d,h,n */ 15529a84457aed4c45bc900998b5e11c03023264208James Dong 15629a84457aed4c45bc900998b5e11c03023264208James Dong ref += y_pos * picpitch + x_pos; 15729a84457aed4c45bc900998b5e11c03023264208James Dong 15829a84457aed4c45bc900998b5e11c03023264208James Dong eVertInterp1MC(ref, picpitch, pred, pred_pitch, blkwidth, blkheight, dy); 15929a84457aed4c45bc900998b5e11c03023264208James Dong } 16029a84457aed4c45bc900998b5e11c03023264208James Dong else if (dy == 2) 16129a84457aed4c45bc900998b5e11c03023264208James Dong { /* horizontal cross *//* i, j, k */ 16229a84457aed4c45bc900998b5e11c03023264208James Dong 16329a84457aed4c45bc900998b5e11c03023264208James Dong ref += y_pos * picpitch + x_pos - 2; /* move to the left 2 pixels */ 16429a84457aed4c45bc900998b5e11c03023264208James Dong 16529a84457aed4c45bc900998b5e11c03023264208James Dong eVertInterp2MC(ref, picpitch, &temp2[0][0], 21, blkwidth + 5, blkheight); 16629a84457aed4c45bc900998b5e11c03023264208James Dong 16729a84457aed4c45bc900998b5e11c03023264208James Dong eHorzInterp2MC(&temp2[0][2], 21, pred, pred_pitch, blkwidth, blkheight, dx); 16829a84457aed4c45bc900998b5e11c03023264208James Dong } 16929a84457aed4c45bc900998b5e11c03023264208James Dong else if (dx == 2) 17029a84457aed4c45bc900998b5e11c03023264208James Dong { /* vertical cross */ /* f,q */ 17129a84457aed4c45bc900998b5e11c03023264208James Dong 17229a84457aed4c45bc900998b5e11c03023264208James Dong ref += (y_pos - 2) * picpitch + x_pos; /* move to up 2 lines */ 17329a84457aed4c45bc900998b5e11c03023264208James Dong 17429a84457aed4c45bc900998b5e11c03023264208James Dong eHorzInterp3MC(ref, picpitch, &temp2[0][0], 21, blkwidth, blkheight + 5); 17529a84457aed4c45bc900998b5e11c03023264208James Dong eVertInterp3MC(&temp2[2][0], 21, pred, pred_pitch, blkwidth, blkheight, dy); 17629a84457aed4c45bc900998b5e11c03023264208James Dong } 17729a84457aed4c45bc900998b5e11c03023264208James Dong else 17829a84457aed4c45bc900998b5e11c03023264208James Dong { /* diagonal *//* e,g,p,r */ 17929a84457aed4c45bc900998b5e11c03023264208James Dong 18029a84457aed4c45bc900998b5e11c03023264208James Dong ref2 = ref + (y_pos + (dy / 2)) * picpitch + x_pos; 18129a84457aed4c45bc900998b5e11c03023264208James Dong 18229a84457aed4c45bc900998b5e11c03023264208James Dong ref += (y_pos * picpitch) + x_pos + (dx / 2); 18329a84457aed4c45bc900998b5e11c03023264208James Dong 18429a84457aed4c45bc900998b5e11c03023264208James Dong eDiagonalInterpMC(ref2, ref, picpitch, pred, pred_pitch, blkwidth, blkheight); 18529a84457aed4c45bc900998b5e11c03023264208James Dong } 18629a84457aed4c45bc900998b5e11c03023264208James Dong 18729a84457aed4c45bc900998b5e11c03023264208James Dong return ; 18829a84457aed4c45bc900998b5e11c03023264208James Dong} 18929a84457aed4c45bc900998b5e11c03023264208James Dong 19029a84457aed4c45bc900998b5e11c03023264208James Dongvoid eCreateAlign(uint8 *ref, int picpitch, int y_pos, 19129a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *out, int blkwidth, int blkheight) 19229a84457aed4c45bc900998b5e11c03023264208James Dong{ 19329a84457aed4c45bc900998b5e11c03023264208James Dong int i, j; 19429a84457aed4c45bc900998b5e11c03023264208James Dong int offset, out_offset; 19529a84457aed4c45bc900998b5e11c03023264208James Dong uint32 prev_pix, result, pix1, pix2, pix4; 19629a84457aed4c45bc900998b5e11c03023264208James Dong 19729a84457aed4c45bc900998b5e11c03023264208James Dong ref += y_pos * picpitch;// + x_pos; 19829a84457aed4c45bc900998b5e11c03023264208James Dong out_offset = 24 - blkwidth; 19929a84457aed4c45bc900998b5e11c03023264208James Dong 20029a84457aed4c45bc900998b5e11c03023264208James Dong //switch(x_pos&0x3){ 2014b43b41eaf8c4c80f66185e13620cf94b8b2ef5bMartin Storsjo switch (((intptr_t)ref)&0x3) 20229a84457aed4c45bc900998b5e11c03023264208James Dong { 20329a84457aed4c45bc900998b5e11c03023264208James Dong case 1: 20429a84457aed4c45bc900998b5e11c03023264208James Dong offset = picpitch - blkwidth - 3; 20529a84457aed4c45bc900998b5e11c03023264208James Dong for (j = 0; j < blkheight; j++) 20629a84457aed4c45bc900998b5e11c03023264208James Dong { 20729a84457aed4c45bc900998b5e11c03023264208James Dong pix1 = *ref++; 20829a84457aed4c45bc900998b5e11c03023264208James Dong pix2 = *((uint16*)ref); 20929a84457aed4c45bc900998b5e11c03023264208James Dong ref += 2; 21029a84457aed4c45bc900998b5e11c03023264208James Dong result = (pix2 << 8) | pix1; 21129a84457aed4c45bc900998b5e11c03023264208James Dong 21229a84457aed4c45bc900998b5e11c03023264208James Dong for (i = 3; i < blkwidth; i += 4) 21329a84457aed4c45bc900998b5e11c03023264208James Dong { 21429a84457aed4c45bc900998b5e11c03023264208James Dong pix4 = *((uint32*)ref); 21529a84457aed4c45bc900998b5e11c03023264208James Dong ref += 4; 21629a84457aed4c45bc900998b5e11c03023264208James Dong prev_pix = (pix4 << 24) & 0xFF000000; /* mask out byte belong to previous word */ 21729a84457aed4c45bc900998b5e11c03023264208James Dong result |= prev_pix; 21829a84457aed4c45bc900998b5e11c03023264208James Dong *((uint32*)out) = result; /* write 4 bytes */ 21929a84457aed4c45bc900998b5e11c03023264208James Dong out += 4; 22029a84457aed4c45bc900998b5e11c03023264208James Dong result = pix4 >> 8; /* for the next loop */ 22129a84457aed4c45bc900998b5e11c03023264208James Dong } 22229a84457aed4c45bc900998b5e11c03023264208James Dong ref += offset; 22329a84457aed4c45bc900998b5e11c03023264208James Dong out += out_offset; 22429a84457aed4c45bc900998b5e11c03023264208James Dong } 22529a84457aed4c45bc900998b5e11c03023264208James Dong break; 22629a84457aed4c45bc900998b5e11c03023264208James Dong case 2: 22729a84457aed4c45bc900998b5e11c03023264208James Dong offset = picpitch - blkwidth - 2; 22829a84457aed4c45bc900998b5e11c03023264208James Dong for (j = 0; j < blkheight; j++) 22929a84457aed4c45bc900998b5e11c03023264208James Dong { 23029a84457aed4c45bc900998b5e11c03023264208James Dong result = *((uint16*)ref); 23129a84457aed4c45bc900998b5e11c03023264208James Dong ref += 2; 23229a84457aed4c45bc900998b5e11c03023264208James Dong for (i = 2; i < blkwidth; i += 4) 23329a84457aed4c45bc900998b5e11c03023264208James Dong { 23429a84457aed4c45bc900998b5e11c03023264208James Dong pix4 = *((uint32*)ref); 23529a84457aed4c45bc900998b5e11c03023264208James Dong ref += 4; 23629a84457aed4c45bc900998b5e11c03023264208James Dong prev_pix = (pix4 << 16) & 0xFFFF0000; /* mask out byte belong to previous word */ 23729a84457aed4c45bc900998b5e11c03023264208James Dong result |= prev_pix; 23829a84457aed4c45bc900998b5e11c03023264208James Dong *((uint32*)out) = result; /* write 4 bytes */ 23929a84457aed4c45bc900998b5e11c03023264208James Dong out += 4; 24029a84457aed4c45bc900998b5e11c03023264208James Dong result = pix4 >> 16; /* for the next loop */ 24129a84457aed4c45bc900998b5e11c03023264208James Dong } 24229a84457aed4c45bc900998b5e11c03023264208James Dong ref += offset; 24329a84457aed4c45bc900998b5e11c03023264208James Dong out += out_offset; 24429a84457aed4c45bc900998b5e11c03023264208James Dong } 24529a84457aed4c45bc900998b5e11c03023264208James Dong break; 24629a84457aed4c45bc900998b5e11c03023264208James Dong case 3: 24729a84457aed4c45bc900998b5e11c03023264208James Dong offset = picpitch - blkwidth - 1; 24829a84457aed4c45bc900998b5e11c03023264208James Dong for (j = 0; j < blkheight; j++) 24929a84457aed4c45bc900998b5e11c03023264208James Dong { 25029a84457aed4c45bc900998b5e11c03023264208James Dong result = *ref++; 25129a84457aed4c45bc900998b5e11c03023264208James Dong for (i = 1; i < blkwidth; i += 4) 25229a84457aed4c45bc900998b5e11c03023264208James Dong { 25329a84457aed4c45bc900998b5e11c03023264208James Dong pix4 = *((uint32*)ref); 25429a84457aed4c45bc900998b5e11c03023264208James Dong ref += 4; 25529a84457aed4c45bc900998b5e11c03023264208James Dong prev_pix = (pix4 << 8) & 0xFFFFFF00; /* mask out byte belong to previous word */ 25629a84457aed4c45bc900998b5e11c03023264208James Dong result |= prev_pix; 25729a84457aed4c45bc900998b5e11c03023264208James Dong *((uint32*)out) = result; /* write 4 bytes */ 25829a84457aed4c45bc900998b5e11c03023264208James Dong out += 4; 25929a84457aed4c45bc900998b5e11c03023264208James Dong result = pix4 >> 24; /* for the next loop */ 26029a84457aed4c45bc900998b5e11c03023264208James Dong } 26129a84457aed4c45bc900998b5e11c03023264208James Dong ref += offset; 26229a84457aed4c45bc900998b5e11c03023264208James Dong out += out_offset; 26329a84457aed4c45bc900998b5e11c03023264208James Dong } 26429a84457aed4c45bc900998b5e11c03023264208James Dong break; 26529a84457aed4c45bc900998b5e11c03023264208James Dong } 26629a84457aed4c45bc900998b5e11c03023264208James Dong} 26729a84457aed4c45bc900998b5e11c03023264208James Dong 26829a84457aed4c45bc900998b5e11c03023264208James Dongvoid eHorzInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch, 26929a84457aed4c45bc900998b5e11c03023264208James Dong int blkwidth, int blkheight, int dx) 27029a84457aed4c45bc900998b5e11c03023264208James Dong{ 2714e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo uint8 *p_ref, *tmp; 27229a84457aed4c45bc900998b5e11c03023264208James Dong uint32 *p_cur; 2734e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo uint32 pkres; 27429a84457aed4c45bc900998b5e11c03023264208James Dong int result, curr_offset, ref_offset; 27529a84457aed4c45bc900998b5e11c03023264208James Dong int j; 27629a84457aed4c45bc900998b5e11c03023264208James Dong int32 r0, r1, r2, r3, r4, r5; 27729a84457aed4c45bc900998b5e11c03023264208James Dong int32 r13, r6; 27829a84457aed4c45bc900998b5e11c03023264208James Dong 27929a84457aed4c45bc900998b5e11c03023264208James Dong p_cur = (uint32*)out; /* assume it's word aligned */ 28029a84457aed4c45bc900998b5e11c03023264208James Dong curr_offset = (outpitch - blkwidth) >> 2; 28129a84457aed4c45bc900998b5e11c03023264208James Dong p_ref = in; 28229a84457aed4c45bc900998b5e11c03023264208James Dong ref_offset = inpitch - blkwidth; 28329a84457aed4c45bc900998b5e11c03023264208James Dong 28429a84457aed4c45bc900998b5e11c03023264208James Dong if (dx&1) 28529a84457aed4c45bc900998b5e11c03023264208James Dong { 28629a84457aed4c45bc900998b5e11c03023264208James Dong dx = ((dx >> 1) ? -3 : -4); /* use in 3/4 pel */ 28729a84457aed4c45bc900998b5e11c03023264208James Dong p_ref -= 2; 28829a84457aed4c45bc900998b5e11c03023264208James Dong r13 = 0; 28929a84457aed4c45bc900998b5e11c03023264208James Dong for (j = blkheight; j > 0; j--) 29029a84457aed4c45bc900998b5e11c03023264208James Dong { 2914e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo tmp = p_ref + blkwidth; 29229a84457aed4c45bc900998b5e11c03023264208James Dong r0 = p_ref[0]; 29329a84457aed4c45bc900998b5e11c03023264208James Dong r1 = p_ref[2]; 29429a84457aed4c45bc900998b5e11c03023264208James Dong r0 |= (r1 << 16); /* 0,c,0,a */ 29529a84457aed4c45bc900998b5e11c03023264208James Dong r1 = p_ref[1]; 29629a84457aed4c45bc900998b5e11c03023264208James Dong r2 = p_ref[3]; 29729a84457aed4c45bc900998b5e11c03023264208James Dong r1 |= (r2 << 16); /* 0,d,0,b */ 2984e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo while (p_ref < tmp) 29929a84457aed4c45bc900998b5e11c03023264208James Dong { 30029a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *(p_ref += 4); /* move pointer to e */ 30129a84457aed4c45bc900998b5e11c03023264208James Dong r3 = p_ref[2]; 30229a84457aed4c45bc900998b5e11c03023264208James Dong r2 |= (r3 << 16); /* 0,g,0,e */ 30329a84457aed4c45bc900998b5e11c03023264208James Dong r3 = p_ref[1]; 30429a84457aed4c45bc900998b5e11c03023264208James Dong r4 = p_ref[3]; 30529a84457aed4c45bc900998b5e11c03023264208James Dong r3 |= (r4 << 16); /* 0,h,0,f */ 30629a84457aed4c45bc900998b5e11c03023264208James Dong 30729a84457aed4c45bc900998b5e11c03023264208James Dong r4 = r0 + r3; /* c+h, a+f */ 30829a84457aed4c45bc900998b5e11c03023264208James Dong r5 = r0 + r1; /* c+d, a+b */ 30929a84457aed4c45bc900998b5e11c03023264208James Dong r6 = r2 + r3; /* g+h, e+f */ 31029a84457aed4c45bc900998b5e11c03023264208James Dong r5 >>= 16; 31129a84457aed4c45bc900998b5e11c03023264208James Dong r5 |= (r6 << 16); /* e+f, c+d */ 31229a84457aed4c45bc900998b5e11c03023264208James Dong r4 += r5 * 20; /* c+20*e+20*f+h, a+20*c+20*d+f */ 31329a84457aed4c45bc900998b5e11c03023264208James Dong r4 += 0x100010; /* +16, +16 */ 31429a84457aed4c45bc900998b5e11c03023264208James Dong r5 = r1 + r2; /* d+g, b+e */ 31529a84457aed4c45bc900998b5e11c03023264208James Dong r4 -= r5 * 5; /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */ 31629a84457aed4c45bc900998b5e11c03023264208James Dong r4 >>= 5; 31729a84457aed4c45bc900998b5e11c03023264208James Dong r13 |= r4; /* check clipping */ 31829a84457aed4c45bc900998b5e11c03023264208James Dong 31929a84457aed4c45bc900998b5e11c03023264208James Dong r5 = p_ref[dx+2]; 32029a84457aed4c45bc900998b5e11c03023264208James Dong r6 = p_ref[dx+4]; 32129a84457aed4c45bc900998b5e11c03023264208James Dong r5 |= (r6 << 16); 32229a84457aed4c45bc900998b5e11c03023264208James Dong r4 += r5; 32329a84457aed4c45bc900998b5e11c03023264208James Dong r4 += 0x10001; 32429a84457aed4c45bc900998b5e11c03023264208James Dong r4 = (r4 >> 1) & 0xFF00FF; 32529a84457aed4c45bc900998b5e11c03023264208James Dong 32629a84457aed4c45bc900998b5e11c03023264208James Dong r5 = p_ref[4]; /* i */ 32729a84457aed4c45bc900998b5e11c03023264208James Dong r6 = (r5 << 16); 32829a84457aed4c45bc900998b5e11c03023264208James Dong r5 = r6 | (r2 >> 16);/* 0,i,0,g */ 32929a84457aed4c45bc900998b5e11c03023264208James Dong r5 += r1; /* d+i, b+g */ /* r5 not free */ 33029a84457aed4c45bc900998b5e11c03023264208James Dong r1 >>= 16; 33129a84457aed4c45bc900998b5e11c03023264208James Dong r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */ 33229a84457aed4c45bc900998b5e11c03023264208James Dong r1 += r2; /* f+g, d+e */ 33329a84457aed4c45bc900998b5e11c03023264208James Dong r5 += 20 * r1; /* d+20f+20g+i, b+20d+20e+g */ 33429a84457aed4c45bc900998b5e11c03023264208James Dong r0 >>= 16; 33529a84457aed4c45bc900998b5e11c03023264208James Dong r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */ 33629a84457aed4c45bc900998b5e11c03023264208James Dong r0 += r3; /* e+h, c+f */ 33729a84457aed4c45bc900998b5e11c03023264208James Dong r5 += 0x100010; /* 16,16 */ 33829a84457aed4c45bc900998b5e11c03023264208James Dong r5 -= r0 * 5; /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */ 33929a84457aed4c45bc900998b5e11c03023264208James Dong r5 >>= 5; 34029a84457aed4c45bc900998b5e11c03023264208James Dong r13 |= r5; /* check clipping */ 34129a84457aed4c45bc900998b5e11c03023264208James Dong 34229a84457aed4c45bc900998b5e11c03023264208James Dong r0 = p_ref[dx+3]; 34329a84457aed4c45bc900998b5e11c03023264208James Dong r1 = p_ref[dx+5]; 34429a84457aed4c45bc900998b5e11c03023264208James Dong r0 |= (r1 << 16); 34529a84457aed4c45bc900998b5e11c03023264208James Dong r5 += r0; 34629a84457aed4c45bc900998b5e11c03023264208James Dong r5 += 0x10001; 34729a84457aed4c45bc900998b5e11c03023264208James Dong r5 = (r5 >> 1) & 0xFF00FF; 34829a84457aed4c45bc900998b5e11c03023264208James Dong 34929a84457aed4c45bc900998b5e11c03023264208James Dong r4 |= (r5 << 8); /* pack them together */ 35029a84457aed4c45bc900998b5e11c03023264208James Dong *p_cur++ = r4; 35129a84457aed4c45bc900998b5e11c03023264208James Dong r1 = r3; 35229a84457aed4c45bc900998b5e11c03023264208James Dong r0 = r2; 35329a84457aed4c45bc900998b5e11c03023264208James Dong } 35429a84457aed4c45bc900998b5e11c03023264208James Dong p_cur += curr_offset; /* move to the next line */ 35529a84457aed4c45bc900998b5e11c03023264208James Dong p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */ 35629a84457aed4c45bc900998b5e11c03023264208James Dong 35729a84457aed4c45bc900998b5e11c03023264208James Dong if (r13&0xFF000700) /* need clipping */ 35829a84457aed4c45bc900998b5e11c03023264208James Dong { 35929a84457aed4c45bc900998b5e11c03023264208James Dong /* move back to the beginning of the line */ 36029a84457aed4c45bc900998b5e11c03023264208James Dong p_ref -= (ref_offset + blkwidth); /* input */ 36129a84457aed4c45bc900998b5e11c03023264208James Dong p_cur -= (outpitch >> 2); 36229a84457aed4c45bc900998b5e11c03023264208James Dong 3634e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo tmp = p_ref + blkwidth; 3644e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo for (; p_ref < tmp;) 36529a84457aed4c45bc900998b5e11c03023264208James Dong { 36629a84457aed4c45bc900998b5e11c03023264208James Dong 36729a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *p_ref++; 36829a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *p_ref++; 36929a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *p_ref++; 37029a84457aed4c45bc900998b5e11c03023264208James Dong r3 = *p_ref++; 37129a84457aed4c45bc900998b5e11c03023264208James Dong r4 = *p_ref++; 37229a84457aed4c45bc900998b5e11c03023264208James Dong /* first pixel */ 37329a84457aed4c45bc900998b5e11c03023264208James Dong r5 = *p_ref++; 37429a84457aed4c45bc900998b5e11c03023264208James Dong result = (r0 + r5); 37529a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r1 + r4); 37629a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 37729a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r2 + r3); 37829a84457aed4c45bc900998b5e11c03023264208James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 37929a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 38029a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 38129a84457aed4c45bc900998b5e11c03023264208James Dong /* 3/4 pel, no need to clip */ 38229a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + p_ref[dx] + 1); 38329a84457aed4c45bc900998b5e11c03023264208James Dong pkres = (result >> 1) ; 38429a84457aed4c45bc900998b5e11c03023264208James Dong /* second pixel */ 38529a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *p_ref++; 38629a84457aed4c45bc900998b5e11c03023264208James Dong result = (r1 + r0); 38729a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r2 + r5); 38829a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 38929a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r3 + r4); 39029a84457aed4c45bc900998b5e11c03023264208James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 39129a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 39229a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 39329a84457aed4c45bc900998b5e11c03023264208James Dong /* 3/4 pel, no need to clip */ 39429a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + p_ref[dx] + 1); 39529a84457aed4c45bc900998b5e11c03023264208James Dong result = (result >> 1); 39629a84457aed4c45bc900998b5e11c03023264208James Dong pkres |= (result << 8); 39729a84457aed4c45bc900998b5e11c03023264208James Dong /* third pixel */ 39829a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *p_ref++; 39929a84457aed4c45bc900998b5e11c03023264208James Dong result = (r2 + r1); 40029a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r3 + r0); 40129a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 40229a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r4 + r5); 40329a84457aed4c45bc900998b5e11c03023264208James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 40429a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 40529a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 40629a84457aed4c45bc900998b5e11c03023264208James Dong /* 3/4 pel, no need to clip */ 40729a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + p_ref[dx] + 1); 40829a84457aed4c45bc900998b5e11c03023264208James Dong result = (result >> 1); 40929a84457aed4c45bc900998b5e11c03023264208James Dong pkres |= (result << 16); 41029a84457aed4c45bc900998b5e11c03023264208James Dong /* fourth pixel */ 41129a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *p_ref++; 41229a84457aed4c45bc900998b5e11c03023264208James Dong result = (r3 + r2); 41329a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r4 + r1); 41429a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 41529a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r5 + r0); 41629a84457aed4c45bc900998b5e11c03023264208James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 41729a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 41829a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 41929a84457aed4c45bc900998b5e11c03023264208James Dong /* 3/4 pel, no need to clip */ 42029a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + p_ref[dx] + 1); 42129a84457aed4c45bc900998b5e11c03023264208James Dong result = (result >> 1); 42229a84457aed4c45bc900998b5e11c03023264208James Dong pkres |= (result << 24); 42329a84457aed4c45bc900998b5e11c03023264208James Dong *p_cur++ = pkres; /* write 4 pixels */ 42429a84457aed4c45bc900998b5e11c03023264208James Dong p_ref -= 5; /* offset back to the middle of filter */ 42529a84457aed4c45bc900998b5e11c03023264208James Dong } 42629a84457aed4c45bc900998b5e11c03023264208James Dong p_cur += curr_offset; /* move to the next line */ 42729a84457aed4c45bc900998b5e11c03023264208James Dong p_ref += ref_offset; /* move to the next line */ 42829a84457aed4c45bc900998b5e11c03023264208James Dong } 42929a84457aed4c45bc900998b5e11c03023264208James Dong } 43029a84457aed4c45bc900998b5e11c03023264208James Dong } 43129a84457aed4c45bc900998b5e11c03023264208James Dong else 43229a84457aed4c45bc900998b5e11c03023264208James Dong { 43329a84457aed4c45bc900998b5e11c03023264208James Dong p_ref -= 2; 43429a84457aed4c45bc900998b5e11c03023264208James Dong r13 = 0; 43529a84457aed4c45bc900998b5e11c03023264208James Dong for (j = blkheight; j > 0; j--) 43629a84457aed4c45bc900998b5e11c03023264208James Dong { 4374e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo tmp = p_ref + blkwidth; 43829a84457aed4c45bc900998b5e11c03023264208James Dong r0 = p_ref[0]; 43929a84457aed4c45bc900998b5e11c03023264208James Dong r1 = p_ref[2]; 44029a84457aed4c45bc900998b5e11c03023264208James Dong r0 |= (r1 << 16); /* 0,c,0,a */ 44129a84457aed4c45bc900998b5e11c03023264208James Dong r1 = p_ref[1]; 44229a84457aed4c45bc900998b5e11c03023264208James Dong r2 = p_ref[3]; 44329a84457aed4c45bc900998b5e11c03023264208James Dong r1 |= (r2 << 16); /* 0,d,0,b */ 4444e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo while (p_ref < tmp) 44529a84457aed4c45bc900998b5e11c03023264208James Dong { 44629a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *(p_ref += 4); /* move pointer to e */ 44729a84457aed4c45bc900998b5e11c03023264208James Dong r3 = p_ref[2]; 44829a84457aed4c45bc900998b5e11c03023264208James Dong r2 |= (r3 << 16); /* 0,g,0,e */ 44929a84457aed4c45bc900998b5e11c03023264208James Dong r3 = p_ref[1]; 45029a84457aed4c45bc900998b5e11c03023264208James Dong r4 = p_ref[3]; 45129a84457aed4c45bc900998b5e11c03023264208James Dong r3 |= (r4 << 16); /* 0,h,0,f */ 45229a84457aed4c45bc900998b5e11c03023264208James Dong 45329a84457aed4c45bc900998b5e11c03023264208James Dong r4 = r0 + r3; /* c+h, a+f */ 45429a84457aed4c45bc900998b5e11c03023264208James Dong r5 = r0 + r1; /* c+d, a+b */ 45529a84457aed4c45bc900998b5e11c03023264208James Dong r6 = r2 + r3; /* g+h, e+f */ 45629a84457aed4c45bc900998b5e11c03023264208James Dong r5 >>= 16; 45729a84457aed4c45bc900998b5e11c03023264208James Dong r5 |= (r6 << 16); /* e+f, c+d */ 45829a84457aed4c45bc900998b5e11c03023264208James Dong r4 += r5 * 20; /* c+20*e+20*f+h, a+20*c+20*d+f */ 45929a84457aed4c45bc900998b5e11c03023264208James Dong r4 += 0x100010; /* +16, +16 */ 46029a84457aed4c45bc900998b5e11c03023264208James Dong r5 = r1 + r2; /* d+g, b+e */ 46129a84457aed4c45bc900998b5e11c03023264208James Dong r4 -= r5 * 5; /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */ 46229a84457aed4c45bc900998b5e11c03023264208James Dong r4 >>= 5; 46329a84457aed4c45bc900998b5e11c03023264208James Dong r13 |= r4; /* check clipping */ 46429a84457aed4c45bc900998b5e11c03023264208James Dong r4 &= 0xFF00FF; /* mask */ 46529a84457aed4c45bc900998b5e11c03023264208James Dong 46629a84457aed4c45bc900998b5e11c03023264208James Dong r5 = p_ref[4]; /* i */ 46729a84457aed4c45bc900998b5e11c03023264208James Dong r6 = (r5 << 16); 46829a84457aed4c45bc900998b5e11c03023264208James Dong r5 = r6 | (r2 >> 16);/* 0,i,0,g */ 46929a84457aed4c45bc900998b5e11c03023264208James Dong r5 += r1; /* d+i, b+g */ /* r5 not free */ 47029a84457aed4c45bc900998b5e11c03023264208James Dong r1 >>= 16; 47129a84457aed4c45bc900998b5e11c03023264208James Dong r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */ 47229a84457aed4c45bc900998b5e11c03023264208James Dong r1 += r2; /* f+g, d+e */ 47329a84457aed4c45bc900998b5e11c03023264208James Dong r5 += 20 * r1; /* d+20f+20g+i, b+20d+20e+g */ 47429a84457aed4c45bc900998b5e11c03023264208James Dong r0 >>= 16; 47529a84457aed4c45bc900998b5e11c03023264208James Dong r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */ 47629a84457aed4c45bc900998b5e11c03023264208James Dong r0 += r3; /* e+h, c+f */ 47729a84457aed4c45bc900998b5e11c03023264208James Dong r5 += 0x100010; /* 16,16 */ 47829a84457aed4c45bc900998b5e11c03023264208James Dong r5 -= r0 * 5; /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */ 47929a84457aed4c45bc900998b5e11c03023264208James Dong r5 >>= 5; 48029a84457aed4c45bc900998b5e11c03023264208James Dong r13 |= r5; /* check clipping */ 48129a84457aed4c45bc900998b5e11c03023264208James Dong r5 &= 0xFF00FF; /* mask */ 48229a84457aed4c45bc900998b5e11c03023264208James Dong 48329a84457aed4c45bc900998b5e11c03023264208James Dong r4 |= (r5 << 8); /* pack them together */ 48429a84457aed4c45bc900998b5e11c03023264208James Dong *p_cur++ = r4; 48529a84457aed4c45bc900998b5e11c03023264208James Dong r1 = r3; 48629a84457aed4c45bc900998b5e11c03023264208James Dong r0 = r2; 48729a84457aed4c45bc900998b5e11c03023264208James Dong } 48829a84457aed4c45bc900998b5e11c03023264208James Dong p_cur += curr_offset; /* move to the next line */ 48929a84457aed4c45bc900998b5e11c03023264208James Dong p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */ 49029a84457aed4c45bc900998b5e11c03023264208James Dong 49129a84457aed4c45bc900998b5e11c03023264208James Dong if (r13&0xFF000700) /* need clipping */ 49229a84457aed4c45bc900998b5e11c03023264208James Dong { 49329a84457aed4c45bc900998b5e11c03023264208James Dong /* move back to the beginning of the line */ 49429a84457aed4c45bc900998b5e11c03023264208James Dong p_ref -= (ref_offset + blkwidth); /* input */ 49529a84457aed4c45bc900998b5e11c03023264208James Dong p_cur -= (outpitch >> 2); 49629a84457aed4c45bc900998b5e11c03023264208James Dong 4974e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo tmp = p_ref + blkwidth; 4984e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo for (; p_ref < tmp;) 49929a84457aed4c45bc900998b5e11c03023264208James Dong { 50029a84457aed4c45bc900998b5e11c03023264208James Dong 50129a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *p_ref++; 50229a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *p_ref++; 50329a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *p_ref++; 50429a84457aed4c45bc900998b5e11c03023264208James Dong r3 = *p_ref++; 50529a84457aed4c45bc900998b5e11c03023264208James Dong r4 = *p_ref++; 50629a84457aed4c45bc900998b5e11c03023264208James Dong /* first pixel */ 50729a84457aed4c45bc900998b5e11c03023264208James Dong r5 = *p_ref++; 50829a84457aed4c45bc900998b5e11c03023264208James Dong result = (r0 + r5); 50929a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r1 + r4); 51029a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 51129a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r2 + r3); 51229a84457aed4c45bc900998b5e11c03023264208James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 51329a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 51429a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 51529a84457aed4c45bc900998b5e11c03023264208James Dong pkres = result; 51629a84457aed4c45bc900998b5e11c03023264208James Dong /* second pixel */ 51729a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *p_ref++; 51829a84457aed4c45bc900998b5e11c03023264208James Dong result = (r1 + r0); 51929a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r2 + r5); 52029a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 52129a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r3 + r4); 52229a84457aed4c45bc900998b5e11c03023264208James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 52329a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 52429a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 52529a84457aed4c45bc900998b5e11c03023264208James Dong pkres |= (result << 8); 52629a84457aed4c45bc900998b5e11c03023264208James Dong /* third pixel */ 52729a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *p_ref++; 52829a84457aed4c45bc900998b5e11c03023264208James Dong result = (r2 + r1); 52929a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r3 + r0); 53029a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 53129a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r4 + r5); 53229a84457aed4c45bc900998b5e11c03023264208James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 53329a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 53429a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 53529a84457aed4c45bc900998b5e11c03023264208James Dong pkres |= (result << 16); 53629a84457aed4c45bc900998b5e11c03023264208James Dong /* fourth pixel */ 53729a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *p_ref++; 53829a84457aed4c45bc900998b5e11c03023264208James Dong result = (r3 + r2); 53929a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r4 + r1); 54029a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 54129a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r5 + r0); 54229a84457aed4c45bc900998b5e11c03023264208James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 54329a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 54429a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 54529a84457aed4c45bc900998b5e11c03023264208James Dong pkres |= (result << 24); 54629a84457aed4c45bc900998b5e11c03023264208James Dong *p_cur++ = pkres; /* write 4 pixels */ 54729a84457aed4c45bc900998b5e11c03023264208James Dong p_ref -= 5; 54829a84457aed4c45bc900998b5e11c03023264208James Dong } 54929a84457aed4c45bc900998b5e11c03023264208James Dong p_cur += curr_offset; /* move to the next line */ 55029a84457aed4c45bc900998b5e11c03023264208James Dong p_ref += ref_offset; 55129a84457aed4c45bc900998b5e11c03023264208James Dong } 55229a84457aed4c45bc900998b5e11c03023264208James Dong } 55329a84457aed4c45bc900998b5e11c03023264208James Dong } 55429a84457aed4c45bc900998b5e11c03023264208James Dong 55529a84457aed4c45bc900998b5e11c03023264208James Dong return ; 55629a84457aed4c45bc900998b5e11c03023264208James Dong} 55729a84457aed4c45bc900998b5e11c03023264208James Dong 55829a84457aed4c45bc900998b5e11c03023264208James Dongvoid eHorzInterp2MC(int *in, int inpitch, uint8 *out, int outpitch, 55929a84457aed4c45bc900998b5e11c03023264208James Dong int blkwidth, int blkheight, int dx) 56029a84457aed4c45bc900998b5e11c03023264208James Dong{ 5614e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo int *p_ref, *tmp; 56229a84457aed4c45bc900998b5e11c03023264208James Dong uint32 *p_cur; 5634e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo uint32 pkres; 56429a84457aed4c45bc900998b5e11c03023264208James Dong int result, result2, curr_offset, ref_offset; 56529a84457aed4c45bc900998b5e11c03023264208James Dong int j, r0, r1, r2, r3, r4, r5; 56629a84457aed4c45bc900998b5e11c03023264208James Dong 56729a84457aed4c45bc900998b5e11c03023264208James Dong p_cur = (uint32*)out; /* assume it's word aligned */ 56829a84457aed4c45bc900998b5e11c03023264208James Dong curr_offset = (outpitch - blkwidth) >> 2; 56929a84457aed4c45bc900998b5e11c03023264208James Dong p_ref = in; 57029a84457aed4c45bc900998b5e11c03023264208James Dong ref_offset = inpitch - blkwidth; 57129a84457aed4c45bc900998b5e11c03023264208James Dong 57229a84457aed4c45bc900998b5e11c03023264208James Dong if (dx&1) 57329a84457aed4c45bc900998b5e11c03023264208James Dong { 57429a84457aed4c45bc900998b5e11c03023264208James Dong dx = ((dx >> 1) ? -3 : -4); /* use in 3/4 pel */ 57529a84457aed4c45bc900998b5e11c03023264208James Dong 57629a84457aed4c45bc900998b5e11c03023264208James Dong for (j = blkheight; j > 0 ; j--) 57729a84457aed4c45bc900998b5e11c03023264208James Dong { 5784e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo tmp = p_ref + blkwidth; 5794e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo for (; p_ref < tmp;) 58029a84457aed4c45bc900998b5e11c03023264208James Dong { 58129a84457aed4c45bc900998b5e11c03023264208James Dong 58229a84457aed4c45bc900998b5e11c03023264208James Dong r0 = p_ref[-2]; 58329a84457aed4c45bc900998b5e11c03023264208James Dong r1 = p_ref[-1]; 58429a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *p_ref++; 58529a84457aed4c45bc900998b5e11c03023264208James Dong r3 = *p_ref++; 58629a84457aed4c45bc900998b5e11c03023264208James Dong r4 = *p_ref++; 58729a84457aed4c45bc900998b5e11c03023264208James Dong /* first pixel */ 58829a84457aed4c45bc900998b5e11c03023264208James Dong r5 = *p_ref++; 58929a84457aed4c45bc900998b5e11c03023264208James Dong result = (r0 + r5); 59029a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r1 + r4); 59129a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 59229a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r2 + r3); 59329a84457aed4c45bc900998b5e11c03023264208James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 59429a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 512) >> 10; 59529a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 59629a84457aed4c45bc900998b5e11c03023264208James Dong result2 = ((p_ref[dx] + 16) >> 5); 59729a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result2) 59829a84457aed4c45bc900998b5e11c03023264208James Dong /* 3/4 pel, no need to clip */ 59929a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + result2 + 1); 60029a84457aed4c45bc900998b5e11c03023264208James Dong pkres = (result >> 1); 60129a84457aed4c45bc900998b5e11c03023264208James Dong /* second pixel */ 60229a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *p_ref++; 60329a84457aed4c45bc900998b5e11c03023264208James Dong result = (r1 + r0); 60429a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r2 + r5); 60529a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 60629a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r3 + r4); 60729a84457aed4c45bc900998b5e11c03023264208James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 60829a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 512) >> 10; 60929a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 61029a84457aed4c45bc900998b5e11c03023264208James Dong result2 = ((p_ref[dx] + 16) >> 5); 61129a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result2) 61229a84457aed4c45bc900998b5e11c03023264208James Dong /* 3/4 pel, no need to clip */ 61329a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + result2 + 1); 61429a84457aed4c45bc900998b5e11c03023264208James Dong result = (result >> 1); 61529a84457aed4c45bc900998b5e11c03023264208James Dong pkres |= (result << 8); 61629a84457aed4c45bc900998b5e11c03023264208James Dong /* third pixel */ 61729a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *p_ref++; 61829a84457aed4c45bc900998b5e11c03023264208James Dong result = (r2 + r1); 61929a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r3 + r0); 62029a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 62129a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r4 + r5); 62229a84457aed4c45bc900998b5e11c03023264208James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 62329a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 512) >> 10; 62429a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 62529a84457aed4c45bc900998b5e11c03023264208James Dong result2 = ((p_ref[dx] + 16) >> 5); 62629a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result2) 62729a84457aed4c45bc900998b5e11c03023264208James Dong /* 3/4 pel, no need to clip */ 62829a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + result2 + 1); 62929a84457aed4c45bc900998b5e11c03023264208James Dong result = (result >> 1); 63029a84457aed4c45bc900998b5e11c03023264208James Dong pkres |= (result << 16); 63129a84457aed4c45bc900998b5e11c03023264208James Dong /* fourth pixel */ 63229a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *p_ref++; 63329a84457aed4c45bc900998b5e11c03023264208James Dong result = (r3 + r2); 63429a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r4 + r1); 63529a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 63629a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r5 + r0); 63729a84457aed4c45bc900998b5e11c03023264208James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 63829a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 512) >> 10; 63929a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 64029a84457aed4c45bc900998b5e11c03023264208James Dong result2 = ((p_ref[dx] + 16) >> 5); 64129a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result2) 64229a84457aed4c45bc900998b5e11c03023264208James Dong /* 3/4 pel, no need to clip */ 64329a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + result2 + 1); 64429a84457aed4c45bc900998b5e11c03023264208James Dong result = (result >> 1); 64529a84457aed4c45bc900998b5e11c03023264208James Dong pkres |= (result << 24); 64629a84457aed4c45bc900998b5e11c03023264208James Dong *p_cur++ = pkres; /* write 4 pixels */ 64729a84457aed4c45bc900998b5e11c03023264208James Dong p_ref -= 3; /* offset back to the middle of filter */ 64829a84457aed4c45bc900998b5e11c03023264208James Dong } 64929a84457aed4c45bc900998b5e11c03023264208James Dong p_cur += curr_offset; /* move to the next line */ 65029a84457aed4c45bc900998b5e11c03023264208James Dong p_ref += ref_offset; /* move to the next line */ 65129a84457aed4c45bc900998b5e11c03023264208James Dong } 65229a84457aed4c45bc900998b5e11c03023264208James Dong } 65329a84457aed4c45bc900998b5e11c03023264208James Dong else 65429a84457aed4c45bc900998b5e11c03023264208James Dong { 65529a84457aed4c45bc900998b5e11c03023264208James Dong for (j = blkheight; j > 0 ; j--) 65629a84457aed4c45bc900998b5e11c03023264208James Dong { 6574e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo tmp = p_ref + blkwidth; 6584e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo for (; p_ref < tmp;) 65929a84457aed4c45bc900998b5e11c03023264208James Dong { 66029a84457aed4c45bc900998b5e11c03023264208James Dong 66129a84457aed4c45bc900998b5e11c03023264208James Dong r0 = p_ref[-2]; 66229a84457aed4c45bc900998b5e11c03023264208James Dong r1 = p_ref[-1]; 66329a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *p_ref++; 66429a84457aed4c45bc900998b5e11c03023264208James Dong r3 = *p_ref++; 66529a84457aed4c45bc900998b5e11c03023264208James Dong r4 = *p_ref++; 66629a84457aed4c45bc900998b5e11c03023264208James Dong /* first pixel */ 66729a84457aed4c45bc900998b5e11c03023264208James Dong r5 = *p_ref++; 66829a84457aed4c45bc900998b5e11c03023264208James Dong result = (r0 + r5); 66929a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r1 + r4); 67029a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 67129a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r2 + r3); 67229a84457aed4c45bc900998b5e11c03023264208James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 67329a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 512) >> 10; 67429a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 67529a84457aed4c45bc900998b5e11c03023264208James Dong pkres = result; 67629a84457aed4c45bc900998b5e11c03023264208James Dong /* second pixel */ 67729a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *p_ref++; 67829a84457aed4c45bc900998b5e11c03023264208James Dong result = (r1 + r0); 67929a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r2 + r5); 68029a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 68129a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r3 + r4); 68229a84457aed4c45bc900998b5e11c03023264208James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 68329a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 512) >> 10; 68429a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 68529a84457aed4c45bc900998b5e11c03023264208James Dong pkres |= (result << 8); 68629a84457aed4c45bc900998b5e11c03023264208James Dong /* third pixel */ 68729a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *p_ref++; 68829a84457aed4c45bc900998b5e11c03023264208James Dong result = (r2 + r1); 68929a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r3 + r0); 69029a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 69129a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r4 + r5); 69229a84457aed4c45bc900998b5e11c03023264208James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 69329a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 512) >> 10; 69429a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 69529a84457aed4c45bc900998b5e11c03023264208James Dong pkres |= (result << 16); 69629a84457aed4c45bc900998b5e11c03023264208James Dong /* fourth pixel */ 69729a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *p_ref++; 69829a84457aed4c45bc900998b5e11c03023264208James Dong result = (r3 + r2); 69929a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r4 + r1); 70029a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 70129a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r5 + r0); 70229a84457aed4c45bc900998b5e11c03023264208James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 70329a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 512) >> 10; 70429a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 70529a84457aed4c45bc900998b5e11c03023264208James Dong pkres |= (result << 24); 70629a84457aed4c45bc900998b5e11c03023264208James Dong *p_cur++ = pkres; /* write 4 pixels */ 70729a84457aed4c45bc900998b5e11c03023264208James Dong p_ref -= 3; /* offset back to the middle of filter */ 70829a84457aed4c45bc900998b5e11c03023264208James Dong } 70929a84457aed4c45bc900998b5e11c03023264208James Dong p_cur += curr_offset; /* move to the next line */ 71029a84457aed4c45bc900998b5e11c03023264208James Dong p_ref += ref_offset; /* move to the next line */ 71129a84457aed4c45bc900998b5e11c03023264208James Dong } 71229a84457aed4c45bc900998b5e11c03023264208James Dong } 71329a84457aed4c45bc900998b5e11c03023264208James Dong 71429a84457aed4c45bc900998b5e11c03023264208James Dong return ; 71529a84457aed4c45bc900998b5e11c03023264208James Dong} 71629a84457aed4c45bc900998b5e11c03023264208James Dong 71729a84457aed4c45bc900998b5e11c03023264208James Dongvoid eHorzInterp3MC(uint8 *in, int inpitch, int *out, int outpitch, 71829a84457aed4c45bc900998b5e11c03023264208James Dong int blkwidth, int blkheight) 71929a84457aed4c45bc900998b5e11c03023264208James Dong{ 7204e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo uint8 *p_ref, *tmp; 72129a84457aed4c45bc900998b5e11c03023264208James Dong int *p_cur; 72229a84457aed4c45bc900998b5e11c03023264208James Dong int result, curr_offset, ref_offset; 72329a84457aed4c45bc900998b5e11c03023264208James Dong int j, r0, r1, r2, r3, r4, r5; 72429a84457aed4c45bc900998b5e11c03023264208James Dong 72529a84457aed4c45bc900998b5e11c03023264208James Dong p_cur = out; 72629a84457aed4c45bc900998b5e11c03023264208James Dong curr_offset = (outpitch - blkwidth); 72729a84457aed4c45bc900998b5e11c03023264208James Dong p_ref = in; 72829a84457aed4c45bc900998b5e11c03023264208James Dong ref_offset = inpitch - blkwidth; 72929a84457aed4c45bc900998b5e11c03023264208James Dong 73029a84457aed4c45bc900998b5e11c03023264208James Dong for (j = blkheight; j > 0 ; j--) 73129a84457aed4c45bc900998b5e11c03023264208James Dong { 7324e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo tmp = p_ref + blkwidth; 7334e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo for (; p_ref < tmp;) 73429a84457aed4c45bc900998b5e11c03023264208James Dong { 73529a84457aed4c45bc900998b5e11c03023264208James Dong 73629a84457aed4c45bc900998b5e11c03023264208James Dong r0 = p_ref[-2]; 73729a84457aed4c45bc900998b5e11c03023264208James Dong r1 = p_ref[-1]; 73829a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *p_ref++; 73929a84457aed4c45bc900998b5e11c03023264208James Dong r3 = *p_ref++; 74029a84457aed4c45bc900998b5e11c03023264208James Dong r4 = *p_ref++; 74129a84457aed4c45bc900998b5e11c03023264208James Dong /* first pixel */ 74229a84457aed4c45bc900998b5e11c03023264208James Dong r5 = *p_ref++; 74329a84457aed4c45bc900998b5e11c03023264208James Dong result = (r0 + r5); 74429a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r1 + r4); 74529a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 74629a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r2 + r3); 74729a84457aed4c45bc900998b5e11c03023264208James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 74829a84457aed4c45bc900998b5e11c03023264208James Dong *p_cur++ = result; 74929a84457aed4c45bc900998b5e11c03023264208James Dong /* second pixel */ 75029a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *p_ref++; 75129a84457aed4c45bc900998b5e11c03023264208James Dong result = (r1 + r0); 75229a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r2 + r5); 75329a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 75429a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r3 + r4); 75529a84457aed4c45bc900998b5e11c03023264208James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 75629a84457aed4c45bc900998b5e11c03023264208James Dong *p_cur++ = result; 75729a84457aed4c45bc900998b5e11c03023264208James Dong /* third pixel */ 75829a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *p_ref++; 75929a84457aed4c45bc900998b5e11c03023264208James Dong result = (r2 + r1); 76029a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r3 + r0); 76129a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 76229a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r4 + r5); 76329a84457aed4c45bc900998b5e11c03023264208James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 76429a84457aed4c45bc900998b5e11c03023264208James Dong *p_cur++ = result; 76529a84457aed4c45bc900998b5e11c03023264208James Dong /* fourth pixel */ 76629a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *p_ref++; 76729a84457aed4c45bc900998b5e11c03023264208James Dong result = (r3 + r2); 76829a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r4 + r1); 76929a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 77029a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r5 + r0); 77129a84457aed4c45bc900998b5e11c03023264208James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 77229a84457aed4c45bc900998b5e11c03023264208James Dong *p_cur++ = result; 77329a84457aed4c45bc900998b5e11c03023264208James Dong p_ref -= 3; /* move back to the middle of the filter */ 77429a84457aed4c45bc900998b5e11c03023264208James Dong } 77529a84457aed4c45bc900998b5e11c03023264208James Dong p_cur += curr_offset; /* move to the next line */ 77629a84457aed4c45bc900998b5e11c03023264208James Dong p_ref += ref_offset; 77729a84457aed4c45bc900998b5e11c03023264208James Dong } 77829a84457aed4c45bc900998b5e11c03023264208James Dong 77929a84457aed4c45bc900998b5e11c03023264208James Dong return ; 78029a84457aed4c45bc900998b5e11c03023264208James Dong} 78129a84457aed4c45bc900998b5e11c03023264208James Dongvoid eVertInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch, 78229a84457aed4c45bc900998b5e11c03023264208James Dong int blkwidth, int blkheight, int dy) 78329a84457aed4c45bc900998b5e11c03023264208James Dong{ 7844e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo uint8 *p_cur, *p_ref, *tmp; 78529a84457aed4c45bc900998b5e11c03023264208James Dong int result, curr_offset, ref_offset; 78629a84457aed4c45bc900998b5e11c03023264208James Dong int j, i; 78729a84457aed4c45bc900998b5e11c03023264208James Dong int32 r0, r1, r2, r3, r4, r5, r6, r7, r8, r13; 78829a84457aed4c45bc900998b5e11c03023264208James Dong uint8 tmp_in[24][24]; 78929a84457aed4c45bc900998b5e11c03023264208James Dong 79029a84457aed4c45bc900998b5e11c03023264208James Dong /* not word-aligned */ 7914b43b41eaf8c4c80f66185e13620cf94b8b2ef5bMartin Storsjo if (((intptr_t)in)&0x3) 79229a84457aed4c45bc900998b5e11c03023264208James Dong { 79329a84457aed4c45bc900998b5e11c03023264208James Dong eCreateAlign(in, inpitch, -2, &tmp_in[0][0], blkwidth, blkheight + 5); 79429a84457aed4c45bc900998b5e11c03023264208James Dong in = &tmp_in[2][0]; 79529a84457aed4c45bc900998b5e11c03023264208James Dong inpitch = 24; 79629a84457aed4c45bc900998b5e11c03023264208James Dong } 79729a84457aed4c45bc900998b5e11c03023264208James Dong p_cur = out; 79829a84457aed4c45bc900998b5e11c03023264208James Dong curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */ 79929a84457aed4c45bc900998b5e11c03023264208James Dong ref_offset = blkheight * inpitch; /* for limit */ 80029a84457aed4c45bc900998b5e11c03023264208James Dong 80129a84457aed4c45bc900998b5e11c03023264208James Dong curr_offset += 3; 80229a84457aed4c45bc900998b5e11c03023264208James Dong 80329a84457aed4c45bc900998b5e11c03023264208James Dong if (dy&1) 80429a84457aed4c45bc900998b5e11c03023264208James Dong { 80529a84457aed4c45bc900998b5e11c03023264208James Dong dy = (dy >> 1) ? 0 : -inpitch; 80629a84457aed4c45bc900998b5e11c03023264208James Dong 80729a84457aed4c45bc900998b5e11c03023264208James Dong for (j = 0; j < blkwidth; j += 4, in += 4) 80829a84457aed4c45bc900998b5e11c03023264208James Dong { 80929a84457aed4c45bc900998b5e11c03023264208James Dong r13 = 0; 81029a84457aed4c45bc900998b5e11c03023264208James Dong p_ref = in; 81129a84457aed4c45bc900998b5e11c03023264208James Dong p_cur -= outpitch; /* compensate for the first offset */ 8124e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo tmp = p_ref + ref_offset; /* limit */ 8134e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo while (p_ref < tmp) /* the loop un-rolled */ 81429a84457aed4c45bc900998b5e11c03023264208James Dong { 81529a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */ 81629a84457aed4c45bc900998b5e11c03023264208James Dong p_ref += inpitch; 81729a84457aed4c45bc900998b5e11c03023264208James Dong r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */ 81829a84457aed4c45bc900998b5e11c03023264208James Dong r0 &= 0xFF00FF; 81929a84457aed4c45bc900998b5e11c03023264208James Dong 82029a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *((uint32*)(p_ref + (inpitch << 1))); /* r1, r7, ref[3] */ 82129a84457aed4c45bc900998b5e11c03023264208James Dong r7 = (r1 >> 8) & 0xFF00FF; 82229a84457aed4c45bc900998b5e11c03023264208James Dong r1 &= 0xFF00FF; 82329a84457aed4c45bc900998b5e11c03023264208James Dong 82429a84457aed4c45bc900998b5e11c03023264208James Dong r0 += r1; 82529a84457aed4c45bc900998b5e11c03023264208James Dong r6 += r7; 82629a84457aed4c45bc900998b5e11c03023264208James Dong 82729a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */ 82829a84457aed4c45bc900998b5e11c03023264208James Dong r8 = (r2 >> 8) & 0xFF00FF; 82929a84457aed4c45bc900998b5e11c03023264208James Dong r2 &= 0xFF00FF; 83029a84457aed4c45bc900998b5e11c03023264208James Dong 83129a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */ 83229a84457aed4c45bc900998b5e11c03023264208James Dong r7 = (r1 >> 8) & 0xFF00FF; 83329a84457aed4c45bc900998b5e11c03023264208James Dong r1 &= 0xFF00FF; 83429a84457aed4c45bc900998b5e11c03023264208James Dong r1 += r2; 83529a84457aed4c45bc900998b5e11c03023264208James Dong 83629a84457aed4c45bc900998b5e11c03023264208James Dong r7 += r8; 83729a84457aed4c45bc900998b5e11c03023264208James Dong 83829a84457aed4c45bc900998b5e11c03023264208James Dong r0 += 20 * r1; 83929a84457aed4c45bc900998b5e11c03023264208James Dong r6 += 20 * r7; 84029a84457aed4c45bc900998b5e11c03023264208James Dong r0 += 0x100010; 84129a84457aed4c45bc900998b5e11c03023264208James Dong r6 += 0x100010; 84229a84457aed4c45bc900998b5e11c03023264208James Dong 84329a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */ 84429a84457aed4c45bc900998b5e11c03023264208James Dong r8 = (r2 >> 8) & 0xFF00FF; 84529a84457aed4c45bc900998b5e11c03023264208James Dong r2 &= 0xFF00FF; 84629a84457aed4c45bc900998b5e11c03023264208James Dong 84729a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */ 84829a84457aed4c45bc900998b5e11c03023264208James Dong r7 = (r1 >> 8) & 0xFF00FF; 84929a84457aed4c45bc900998b5e11c03023264208James Dong r1 &= 0xFF00FF; 85029a84457aed4c45bc900998b5e11c03023264208James Dong r1 += r2; 85129a84457aed4c45bc900998b5e11c03023264208James Dong 85229a84457aed4c45bc900998b5e11c03023264208James Dong r7 += r8; 85329a84457aed4c45bc900998b5e11c03023264208James Dong 85429a84457aed4c45bc900998b5e11c03023264208James Dong r0 -= 5 * r1; 85529a84457aed4c45bc900998b5e11c03023264208James Dong r6 -= 5 * r7; 85629a84457aed4c45bc900998b5e11c03023264208James Dong 85729a84457aed4c45bc900998b5e11c03023264208James Dong r0 >>= 5; 85829a84457aed4c45bc900998b5e11c03023264208James Dong r6 >>= 5; 85929a84457aed4c45bc900998b5e11c03023264208James Dong /* clip */ 86029a84457aed4c45bc900998b5e11c03023264208James Dong r13 |= r6; 86129a84457aed4c45bc900998b5e11c03023264208James Dong r13 |= r0; 86229a84457aed4c45bc900998b5e11c03023264208James Dong //CLIPPACK(r6,result) 86329a84457aed4c45bc900998b5e11c03023264208James Dong 86429a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *((uint32*)(p_ref + dy)); 86529a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r1 >> 8) & 0xFF00FF; 86629a84457aed4c45bc900998b5e11c03023264208James Dong r1 &= 0xFF00FF; 86729a84457aed4c45bc900998b5e11c03023264208James Dong r0 += r1; 86829a84457aed4c45bc900998b5e11c03023264208James Dong r6 += r2; 86929a84457aed4c45bc900998b5e11c03023264208James Dong r0 += 0x10001; 87029a84457aed4c45bc900998b5e11c03023264208James Dong r6 += 0x10001; 87129a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r0 >> 1) & 0xFF00FF; 87229a84457aed4c45bc900998b5e11c03023264208James Dong r6 = (r6 >> 1) & 0xFF00FF; 87329a84457aed4c45bc900998b5e11c03023264208James Dong 87429a84457aed4c45bc900998b5e11c03023264208James Dong r0 |= (r6 << 8); /* pack it back */ 87529a84457aed4c45bc900998b5e11c03023264208James Dong *((uint32*)(p_cur += outpitch)) = r0; 87629a84457aed4c45bc900998b5e11c03023264208James Dong } 87729a84457aed4c45bc900998b5e11c03023264208James Dong p_cur += curr_offset; /* offset to the next pixel */ 87829a84457aed4c45bc900998b5e11c03023264208James Dong if (r13 & 0xFF000700) /* this column need clipping */ 87929a84457aed4c45bc900998b5e11c03023264208James Dong { 88029a84457aed4c45bc900998b5e11c03023264208James Dong p_cur -= 4; 88129a84457aed4c45bc900998b5e11c03023264208James Dong for (i = 0; i < 4; i++) 88229a84457aed4c45bc900998b5e11c03023264208James Dong { 88329a84457aed4c45bc900998b5e11c03023264208James Dong p_ref = in + i; 88429a84457aed4c45bc900998b5e11c03023264208James Dong p_cur -= outpitch; /* compensate for the first offset */ 88529a84457aed4c45bc900998b5e11c03023264208James Dong 8864e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo tmp = p_ref + ref_offset; /* limit */ 8874e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo while (p_ref < tmp) 88829a84457aed4c45bc900998b5e11c03023264208James Dong { /* loop un-rolled */ 88929a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *(p_ref - (inpitch << 1)); 89029a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *(p_ref - inpitch); 89129a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *p_ref; 89229a84457aed4c45bc900998b5e11c03023264208James Dong r3 = *(p_ref += inpitch); /* modify pointer before loading */ 89329a84457aed4c45bc900998b5e11c03023264208James Dong r4 = *(p_ref += inpitch); 89429a84457aed4c45bc900998b5e11c03023264208James Dong /* first pixel */ 89529a84457aed4c45bc900998b5e11c03023264208James Dong r5 = *(p_ref += inpitch); 89629a84457aed4c45bc900998b5e11c03023264208James Dong result = (r0 + r5); 89729a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r1 + r4); 89829a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 89929a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r2 + r3); 90029a84457aed4c45bc900998b5e11c03023264208James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 90129a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 90229a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 90329a84457aed4c45bc900998b5e11c03023264208James Dong /* 3/4 pel, no need to clip */ 90429a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + p_ref[dy-(inpitch<<1)] + 1); 90529a84457aed4c45bc900998b5e11c03023264208James Dong result = (result >> 1); 90629a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 90729a84457aed4c45bc900998b5e11c03023264208James Dong /* second pixel */ 90829a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *(p_ref += inpitch); 90929a84457aed4c45bc900998b5e11c03023264208James Dong result = (r1 + r0); 91029a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r2 + r5); 91129a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 91229a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r3 + r4); 91329a84457aed4c45bc900998b5e11c03023264208James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 91429a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 91529a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 91629a84457aed4c45bc900998b5e11c03023264208James Dong /* 3/4 pel, no need to clip */ 91729a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + p_ref[dy-(inpitch<<1)] + 1); 91829a84457aed4c45bc900998b5e11c03023264208James Dong result = (result >> 1); 91929a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 92029a84457aed4c45bc900998b5e11c03023264208James Dong /* third pixel */ 92129a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *(p_ref += inpitch); 92229a84457aed4c45bc900998b5e11c03023264208James Dong result = (r2 + r1); 92329a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r3 + r0); 92429a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 92529a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r4 + r5); 92629a84457aed4c45bc900998b5e11c03023264208James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 92729a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 92829a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 92929a84457aed4c45bc900998b5e11c03023264208James Dong /* 3/4 pel, no need to clip */ 93029a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + p_ref[dy-(inpitch<<1)] + 1); 93129a84457aed4c45bc900998b5e11c03023264208James Dong result = (result >> 1); 93229a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 93329a84457aed4c45bc900998b5e11c03023264208James Dong /* fourth pixel */ 93429a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *(p_ref += inpitch); 93529a84457aed4c45bc900998b5e11c03023264208James Dong result = (r3 + r2); 93629a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r4 + r1); 93729a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 93829a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r5 + r0); 93929a84457aed4c45bc900998b5e11c03023264208James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 94029a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 94129a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 94229a84457aed4c45bc900998b5e11c03023264208James Dong /* 3/4 pel, no need to clip */ 94329a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + p_ref[dy-(inpitch<<1)] + 1); 94429a84457aed4c45bc900998b5e11c03023264208James Dong result = (result >> 1); 94529a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 94629a84457aed4c45bc900998b5e11c03023264208James Dong p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ 94729a84457aed4c45bc900998b5e11c03023264208James Dong } 94829a84457aed4c45bc900998b5e11c03023264208James Dong p_cur += (curr_offset - 3); 94929a84457aed4c45bc900998b5e11c03023264208James Dong } 95029a84457aed4c45bc900998b5e11c03023264208James Dong } 95129a84457aed4c45bc900998b5e11c03023264208James Dong } 95229a84457aed4c45bc900998b5e11c03023264208James Dong } 95329a84457aed4c45bc900998b5e11c03023264208James Dong else 95429a84457aed4c45bc900998b5e11c03023264208James Dong { 95529a84457aed4c45bc900998b5e11c03023264208James Dong for (j = 0; j < blkwidth; j += 4, in += 4) 95629a84457aed4c45bc900998b5e11c03023264208James Dong { 95729a84457aed4c45bc900998b5e11c03023264208James Dong r13 = 0; 95829a84457aed4c45bc900998b5e11c03023264208James Dong p_ref = in; 95929a84457aed4c45bc900998b5e11c03023264208James Dong p_cur -= outpitch; /* compensate for the first offset */ 9604e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo tmp = p_ref + ref_offset; /* limit */ 9614e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo while (p_ref < tmp) /* the loop un-rolled */ 96229a84457aed4c45bc900998b5e11c03023264208James Dong { 96329a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */ 96429a84457aed4c45bc900998b5e11c03023264208James Dong p_ref += inpitch; 96529a84457aed4c45bc900998b5e11c03023264208James Dong r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */ 96629a84457aed4c45bc900998b5e11c03023264208James Dong r0 &= 0xFF00FF; 96729a84457aed4c45bc900998b5e11c03023264208James Dong 96829a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *((uint32*)(p_ref + (inpitch << 1))); /* r1, r7, ref[3] */ 96929a84457aed4c45bc900998b5e11c03023264208James Dong r7 = (r1 >> 8) & 0xFF00FF; 97029a84457aed4c45bc900998b5e11c03023264208James Dong r1 &= 0xFF00FF; 97129a84457aed4c45bc900998b5e11c03023264208James Dong 97229a84457aed4c45bc900998b5e11c03023264208James Dong r0 += r1; 97329a84457aed4c45bc900998b5e11c03023264208James Dong r6 += r7; 97429a84457aed4c45bc900998b5e11c03023264208James Dong 97529a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */ 97629a84457aed4c45bc900998b5e11c03023264208James Dong r8 = (r2 >> 8) & 0xFF00FF; 97729a84457aed4c45bc900998b5e11c03023264208James Dong r2 &= 0xFF00FF; 97829a84457aed4c45bc900998b5e11c03023264208James Dong 97929a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */ 98029a84457aed4c45bc900998b5e11c03023264208James Dong r7 = (r1 >> 8) & 0xFF00FF; 98129a84457aed4c45bc900998b5e11c03023264208James Dong r1 &= 0xFF00FF; 98229a84457aed4c45bc900998b5e11c03023264208James Dong r1 += r2; 98329a84457aed4c45bc900998b5e11c03023264208James Dong 98429a84457aed4c45bc900998b5e11c03023264208James Dong r7 += r8; 98529a84457aed4c45bc900998b5e11c03023264208James Dong 98629a84457aed4c45bc900998b5e11c03023264208James Dong r0 += 20 * r1; 98729a84457aed4c45bc900998b5e11c03023264208James Dong r6 += 20 * r7; 98829a84457aed4c45bc900998b5e11c03023264208James Dong r0 += 0x100010; 98929a84457aed4c45bc900998b5e11c03023264208James Dong r6 += 0x100010; 99029a84457aed4c45bc900998b5e11c03023264208James Dong 99129a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */ 99229a84457aed4c45bc900998b5e11c03023264208James Dong r8 = (r2 >> 8) & 0xFF00FF; 99329a84457aed4c45bc900998b5e11c03023264208James Dong r2 &= 0xFF00FF; 99429a84457aed4c45bc900998b5e11c03023264208James Dong 99529a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */ 99629a84457aed4c45bc900998b5e11c03023264208James Dong r7 = (r1 >> 8) & 0xFF00FF; 99729a84457aed4c45bc900998b5e11c03023264208James Dong r1 &= 0xFF00FF; 99829a84457aed4c45bc900998b5e11c03023264208James Dong r1 += r2; 99929a84457aed4c45bc900998b5e11c03023264208James Dong 100029a84457aed4c45bc900998b5e11c03023264208James Dong r7 += r8; 100129a84457aed4c45bc900998b5e11c03023264208James Dong 100229a84457aed4c45bc900998b5e11c03023264208James Dong r0 -= 5 * r1; 100329a84457aed4c45bc900998b5e11c03023264208James Dong r6 -= 5 * r7; 100429a84457aed4c45bc900998b5e11c03023264208James Dong 100529a84457aed4c45bc900998b5e11c03023264208James Dong r0 >>= 5; 100629a84457aed4c45bc900998b5e11c03023264208James Dong r6 >>= 5; 100729a84457aed4c45bc900998b5e11c03023264208James Dong /* clip */ 100829a84457aed4c45bc900998b5e11c03023264208James Dong r13 |= r6; 100929a84457aed4c45bc900998b5e11c03023264208James Dong r13 |= r0; 101029a84457aed4c45bc900998b5e11c03023264208James Dong //CLIPPACK(r6,result) 101129a84457aed4c45bc900998b5e11c03023264208James Dong r0 &= 0xFF00FF; 101229a84457aed4c45bc900998b5e11c03023264208James Dong r6 &= 0xFF00FF; 101329a84457aed4c45bc900998b5e11c03023264208James Dong r0 |= (r6 << 8); /* pack it back */ 101429a84457aed4c45bc900998b5e11c03023264208James Dong *((uint32*)(p_cur += outpitch)) = r0; 101529a84457aed4c45bc900998b5e11c03023264208James Dong } 101629a84457aed4c45bc900998b5e11c03023264208James Dong p_cur += curr_offset; /* offset to the next pixel */ 101729a84457aed4c45bc900998b5e11c03023264208James Dong if (r13 & 0xFF000700) /* this column need clipping */ 101829a84457aed4c45bc900998b5e11c03023264208James Dong { 101929a84457aed4c45bc900998b5e11c03023264208James Dong p_cur -= 4; 102029a84457aed4c45bc900998b5e11c03023264208James Dong for (i = 0; i < 4; i++) 102129a84457aed4c45bc900998b5e11c03023264208James Dong { 102229a84457aed4c45bc900998b5e11c03023264208James Dong p_ref = in + i; 102329a84457aed4c45bc900998b5e11c03023264208James Dong p_cur -= outpitch; /* compensate for the first offset */ 10244e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo tmp = p_ref + ref_offset; /* limit */ 10254e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo while (p_ref < tmp) 102629a84457aed4c45bc900998b5e11c03023264208James Dong { /* loop un-rolled */ 102729a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *(p_ref - (inpitch << 1)); 102829a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *(p_ref - inpitch); 102929a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *p_ref; 103029a84457aed4c45bc900998b5e11c03023264208James Dong r3 = *(p_ref += inpitch); /* modify pointer before loading */ 103129a84457aed4c45bc900998b5e11c03023264208James Dong r4 = *(p_ref += inpitch); 103229a84457aed4c45bc900998b5e11c03023264208James Dong /* first pixel */ 103329a84457aed4c45bc900998b5e11c03023264208James Dong r5 = *(p_ref += inpitch); 103429a84457aed4c45bc900998b5e11c03023264208James Dong result = (r0 + r5); 103529a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r1 + r4); 103629a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 103729a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r2 + r3); 103829a84457aed4c45bc900998b5e11c03023264208James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 103929a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 104029a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 104129a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 104229a84457aed4c45bc900998b5e11c03023264208James Dong /* second pixel */ 104329a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *(p_ref += inpitch); 104429a84457aed4c45bc900998b5e11c03023264208James Dong result = (r1 + r0); 104529a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r2 + r5); 104629a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 104729a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r3 + r4); 104829a84457aed4c45bc900998b5e11c03023264208James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 104929a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 105029a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 105129a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 105229a84457aed4c45bc900998b5e11c03023264208James Dong /* third pixel */ 105329a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *(p_ref += inpitch); 105429a84457aed4c45bc900998b5e11c03023264208James Dong result = (r2 + r1); 105529a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r3 + r0); 105629a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 105729a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r4 + r5); 105829a84457aed4c45bc900998b5e11c03023264208James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 105929a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 106029a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 106129a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 106229a84457aed4c45bc900998b5e11c03023264208James Dong /* fourth pixel */ 106329a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *(p_ref += inpitch); 106429a84457aed4c45bc900998b5e11c03023264208James Dong result = (r3 + r2); 106529a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r4 + r1); 106629a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 106729a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r5 + r0); 106829a84457aed4c45bc900998b5e11c03023264208James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 106929a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 107029a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 107129a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 107229a84457aed4c45bc900998b5e11c03023264208James Dong p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ 107329a84457aed4c45bc900998b5e11c03023264208James Dong } 107429a84457aed4c45bc900998b5e11c03023264208James Dong p_cur += (curr_offset - 3); 107529a84457aed4c45bc900998b5e11c03023264208James Dong } 107629a84457aed4c45bc900998b5e11c03023264208James Dong } 107729a84457aed4c45bc900998b5e11c03023264208James Dong } 107829a84457aed4c45bc900998b5e11c03023264208James Dong } 107929a84457aed4c45bc900998b5e11c03023264208James Dong 108029a84457aed4c45bc900998b5e11c03023264208James Dong return ; 108129a84457aed4c45bc900998b5e11c03023264208James Dong} 108229a84457aed4c45bc900998b5e11c03023264208James Dong 108329a84457aed4c45bc900998b5e11c03023264208James Dongvoid eVertInterp2MC(uint8 *in, int inpitch, int *out, int outpitch, 108429a84457aed4c45bc900998b5e11c03023264208James Dong int blkwidth, int blkheight) 108529a84457aed4c45bc900998b5e11c03023264208James Dong{ 108629a84457aed4c45bc900998b5e11c03023264208James Dong int *p_cur; 10874e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo uint8 *p_ref, *tmp; 108829a84457aed4c45bc900998b5e11c03023264208James Dong int result, curr_offset, ref_offset; 108929a84457aed4c45bc900998b5e11c03023264208James Dong int j, r0, r1, r2, r3, r4, r5; 109029a84457aed4c45bc900998b5e11c03023264208James Dong 109129a84457aed4c45bc900998b5e11c03023264208James Dong p_cur = out; 109229a84457aed4c45bc900998b5e11c03023264208James Dong curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */ 109329a84457aed4c45bc900998b5e11c03023264208James Dong ref_offset = blkheight * inpitch; /* for limit */ 109429a84457aed4c45bc900998b5e11c03023264208James Dong 109529a84457aed4c45bc900998b5e11c03023264208James Dong for (j = 0; j < blkwidth; j++) 109629a84457aed4c45bc900998b5e11c03023264208James Dong { 109729a84457aed4c45bc900998b5e11c03023264208James Dong p_cur -= outpitch; /* compensate for the first offset */ 109829a84457aed4c45bc900998b5e11c03023264208James Dong p_ref = in++; 109929a84457aed4c45bc900998b5e11c03023264208James Dong 11004e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo tmp = p_ref + ref_offset; /* limit */ 11014e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo while (p_ref < tmp) 110229a84457aed4c45bc900998b5e11c03023264208James Dong { /* loop un-rolled */ 110329a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *(p_ref - (inpitch << 1)); 110429a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *(p_ref - inpitch); 110529a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *p_ref; 110629a84457aed4c45bc900998b5e11c03023264208James Dong r3 = *(p_ref += inpitch); /* modify pointer before loading */ 110729a84457aed4c45bc900998b5e11c03023264208James Dong r4 = *(p_ref += inpitch); 110829a84457aed4c45bc900998b5e11c03023264208James Dong /* first pixel */ 110929a84457aed4c45bc900998b5e11c03023264208James Dong r5 = *(p_ref += inpitch); 111029a84457aed4c45bc900998b5e11c03023264208James Dong result = (r0 + r5); 111129a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r1 + r4); 111229a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 111329a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r2 + r3); 111429a84457aed4c45bc900998b5e11c03023264208James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 111529a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 111629a84457aed4c45bc900998b5e11c03023264208James Dong /* second pixel */ 111729a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *(p_ref += inpitch); 111829a84457aed4c45bc900998b5e11c03023264208James Dong result = (r1 + r0); 111929a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r2 + r5); 112029a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 112129a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r3 + r4); 112229a84457aed4c45bc900998b5e11c03023264208James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 112329a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 112429a84457aed4c45bc900998b5e11c03023264208James Dong /* third pixel */ 112529a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *(p_ref += inpitch); 112629a84457aed4c45bc900998b5e11c03023264208James Dong result = (r2 + r1); 112729a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r3 + r0); 112829a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 112929a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r4 + r5); 113029a84457aed4c45bc900998b5e11c03023264208James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 113129a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 113229a84457aed4c45bc900998b5e11c03023264208James Dong /* fourth pixel */ 113329a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *(p_ref += inpitch); 113429a84457aed4c45bc900998b5e11c03023264208James Dong result = (r3 + r2); 113529a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r4 + r1); 113629a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 113729a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r5 + r0); 113829a84457aed4c45bc900998b5e11c03023264208James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 113929a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 114029a84457aed4c45bc900998b5e11c03023264208James Dong p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ 114129a84457aed4c45bc900998b5e11c03023264208James Dong } 114229a84457aed4c45bc900998b5e11c03023264208James Dong p_cur += curr_offset; 114329a84457aed4c45bc900998b5e11c03023264208James Dong } 114429a84457aed4c45bc900998b5e11c03023264208James Dong 114529a84457aed4c45bc900998b5e11c03023264208James Dong return ; 114629a84457aed4c45bc900998b5e11c03023264208James Dong} 114729a84457aed4c45bc900998b5e11c03023264208James Dong 114829a84457aed4c45bc900998b5e11c03023264208James Dongvoid eVertInterp3MC(int *in, int inpitch, uint8 *out, int outpitch, 114929a84457aed4c45bc900998b5e11c03023264208James Dong int blkwidth, int blkheight, int dy) 115029a84457aed4c45bc900998b5e11c03023264208James Dong{ 115129a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *p_cur; 11524e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo int *p_ref, *tmp; 115329a84457aed4c45bc900998b5e11c03023264208James Dong int result, result2, curr_offset, ref_offset; 115429a84457aed4c45bc900998b5e11c03023264208James Dong int j, r0, r1, r2, r3, r4, r5; 115529a84457aed4c45bc900998b5e11c03023264208James Dong 115629a84457aed4c45bc900998b5e11c03023264208James Dong p_cur = out; 115729a84457aed4c45bc900998b5e11c03023264208James Dong curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */ 115829a84457aed4c45bc900998b5e11c03023264208James Dong ref_offset = blkheight * inpitch; /* for limit */ 115929a84457aed4c45bc900998b5e11c03023264208James Dong 116029a84457aed4c45bc900998b5e11c03023264208James Dong if (dy&1) 116129a84457aed4c45bc900998b5e11c03023264208James Dong { 116229a84457aed4c45bc900998b5e11c03023264208James Dong dy = (dy >> 1) ? -(inpitch << 1) : -(inpitch << 1) - inpitch; 116329a84457aed4c45bc900998b5e11c03023264208James Dong 116429a84457aed4c45bc900998b5e11c03023264208James Dong for (j = 0; j < blkwidth; j++) 116529a84457aed4c45bc900998b5e11c03023264208James Dong { 116629a84457aed4c45bc900998b5e11c03023264208James Dong p_cur -= outpitch; /* compensate for the first offset */ 116729a84457aed4c45bc900998b5e11c03023264208James Dong p_ref = in++; 116829a84457aed4c45bc900998b5e11c03023264208James Dong 11694e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo tmp = p_ref + ref_offset; /* limit */ 11704e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo while (p_ref < tmp) 117129a84457aed4c45bc900998b5e11c03023264208James Dong { /* loop un-rolled */ 117229a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *(p_ref - (inpitch << 1)); 117329a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *(p_ref - inpitch); 117429a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *p_ref; 117529a84457aed4c45bc900998b5e11c03023264208James Dong r3 = *(p_ref += inpitch); /* modify pointer before loading */ 117629a84457aed4c45bc900998b5e11c03023264208James Dong r4 = *(p_ref += inpitch); 117729a84457aed4c45bc900998b5e11c03023264208James Dong /* first pixel */ 117829a84457aed4c45bc900998b5e11c03023264208James Dong r5 = *(p_ref += inpitch); 117929a84457aed4c45bc900998b5e11c03023264208James Dong result = (r0 + r5); 118029a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r1 + r4); 118129a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 118229a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r2 + r3); 118329a84457aed4c45bc900998b5e11c03023264208James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 118429a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 512) >> 10; 118529a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 118629a84457aed4c45bc900998b5e11c03023264208James Dong result2 = ((p_ref[dy] + 16) >> 5); 118729a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result2) 118829a84457aed4c45bc900998b5e11c03023264208James Dong /* 3/4 pel, no need to clip */ 118929a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + result2 + 1); 119029a84457aed4c45bc900998b5e11c03023264208James Dong result = (result >> 1); 119129a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 119229a84457aed4c45bc900998b5e11c03023264208James Dong /* second pixel */ 119329a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *(p_ref += inpitch); 119429a84457aed4c45bc900998b5e11c03023264208James Dong result = (r1 + r0); 119529a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r2 + r5); 119629a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 119729a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r3 + r4); 119829a84457aed4c45bc900998b5e11c03023264208James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 119929a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 512) >> 10; 120029a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 120129a84457aed4c45bc900998b5e11c03023264208James Dong result2 = ((p_ref[dy] + 16) >> 5); 120229a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result2) 120329a84457aed4c45bc900998b5e11c03023264208James Dong /* 3/4 pel, no need to clip */ 120429a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + result2 + 1); 120529a84457aed4c45bc900998b5e11c03023264208James Dong result = (result >> 1); 120629a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 120729a84457aed4c45bc900998b5e11c03023264208James Dong /* third pixel */ 120829a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *(p_ref += inpitch); 120929a84457aed4c45bc900998b5e11c03023264208James Dong result = (r2 + r1); 121029a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r3 + r0); 121129a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 121229a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r4 + r5); 121329a84457aed4c45bc900998b5e11c03023264208James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 121429a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 512) >> 10; 121529a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 121629a84457aed4c45bc900998b5e11c03023264208James Dong result2 = ((p_ref[dy] + 16) >> 5); 121729a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result2) 121829a84457aed4c45bc900998b5e11c03023264208James Dong /* 3/4 pel, no need to clip */ 121929a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + result2 + 1); 122029a84457aed4c45bc900998b5e11c03023264208James Dong result = (result >> 1); 122129a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 122229a84457aed4c45bc900998b5e11c03023264208James Dong /* fourth pixel */ 122329a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *(p_ref += inpitch); 122429a84457aed4c45bc900998b5e11c03023264208James Dong result = (r3 + r2); 122529a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r4 + r1); 122629a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 122729a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r5 + r0); 122829a84457aed4c45bc900998b5e11c03023264208James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 122929a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 512) >> 10; 123029a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 123129a84457aed4c45bc900998b5e11c03023264208James Dong result2 = ((p_ref[dy] + 16) >> 5); 123229a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result2) 123329a84457aed4c45bc900998b5e11c03023264208James Dong /* 3/4 pel, no need to clip */ 123429a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + result2 + 1); 123529a84457aed4c45bc900998b5e11c03023264208James Dong result = (result >> 1); 123629a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 123729a84457aed4c45bc900998b5e11c03023264208James Dong p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ 123829a84457aed4c45bc900998b5e11c03023264208James Dong } 123929a84457aed4c45bc900998b5e11c03023264208James Dong p_cur += curr_offset; 124029a84457aed4c45bc900998b5e11c03023264208James Dong } 124129a84457aed4c45bc900998b5e11c03023264208James Dong } 124229a84457aed4c45bc900998b5e11c03023264208James Dong else 124329a84457aed4c45bc900998b5e11c03023264208James Dong { 124429a84457aed4c45bc900998b5e11c03023264208James Dong for (j = 0; j < blkwidth; j++) 124529a84457aed4c45bc900998b5e11c03023264208James Dong { 124629a84457aed4c45bc900998b5e11c03023264208James Dong p_cur -= outpitch; /* compensate for the first offset */ 124729a84457aed4c45bc900998b5e11c03023264208James Dong p_ref = in++; 124829a84457aed4c45bc900998b5e11c03023264208James Dong 12494e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo tmp = p_ref + ref_offset; /* limit */ 12504e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo while (p_ref < tmp) 125129a84457aed4c45bc900998b5e11c03023264208James Dong { /* loop un-rolled */ 125229a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *(p_ref - (inpitch << 1)); 125329a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *(p_ref - inpitch); 125429a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *p_ref; 125529a84457aed4c45bc900998b5e11c03023264208James Dong r3 = *(p_ref += inpitch); /* modify pointer before loading */ 125629a84457aed4c45bc900998b5e11c03023264208James Dong r4 = *(p_ref += inpitch); 125729a84457aed4c45bc900998b5e11c03023264208James Dong /* first pixel */ 125829a84457aed4c45bc900998b5e11c03023264208James Dong r5 = *(p_ref += inpitch); 125929a84457aed4c45bc900998b5e11c03023264208James Dong result = (r0 + r5); 126029a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r1 + r4); 126129a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 126229a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r2 + r3); 126329a84457aed4c45bc900998b5e11c03023264208James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 126429a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 512) >> 10; 126529a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 126629a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 126729a84457aed4c45bc900998b5e11c03023264208James Dong /* second pixel */ 126829a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *(p_ref += inpitch); 126929a84457aed4c45bc900998b5e11c03023264208James Dong result = (r1 + r0); 127029a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r2 + r5); 127129a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 127229a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r3 + r4); 127329a84457aed4c45bc900998b5e11c03023264208James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 127429a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 512) >> 10; 127529a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 127629a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 127729a84457aed4c45bc900998b5e11c03023264208James Dong /* third pixel */ 127829a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *(p_ref += inpitch); 127929a84457aed4c45bc900998b5e11c03023264208James Dong result = (r2 + r1); 128029a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r3 + r0); 128129a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 128229a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r4 + r5); 128329a84457aed4c45bc900998b5e11c03023264208James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 128429a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 512) >> 10; 128529a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 128629a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 128729a84457aed4c45bc900998b5e11c03023264208James Dong /* fourth pixel */ 128829a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *(p_ref += inpitch); 128929a84457aed4c45bc900998b5e11c03023264208James Dong result = (r3 + r2); 129029a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r4 + r1); 129129a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 129229a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r5 + r0); 129329a84457aed4c45bc900998b5e11c03023264208James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 129429a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 512) >> 10; 129529a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 129629a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 129729a84457aed4c45bc900998b5e11c03023264208James Dong p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ 129829a84457aed4c45bc900998b5e11c03023264208James Dong } 129929a84457aed4c45bc900998b5e11c03023264208James Dong p_cur += curr_offset; 130029a84457aed4c45bc900998b5e11c03023264208James Dong } 130129a84457aed4c45bc900998b5e11c03023264208James Dong } 130229a84457aed4c45bc900998b5e11c03023264208James Dong 130329a84457aed4c45bc900998b5e11c03023264208James Dong return ; 130429a84457aed4c45bc900998b5e11c03023264208James Dong} 130529a84457aed4c45bc900998b5e11c03023264208James Dong 130629a84457aed4c45bc900998b5e11c03023264208James Dongvoid eDiagonalInterpMC(uint8 *in1, uint8 *in2, int inpitch, 130729a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *out, int outpitch, 130829a84457aed4c45bc900998b5e11c03023264208James Dong int blkwidth, int blkheight) 130929a84457aed4c45bc900998b5e11c03023264208James Dong{ 131029a84457aed4c45bc900998b5e11c03023264208James Dong int j, i; 131129a84457aed4c45bc900998b5e11c03023264208James Dong int result; 13124e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo uint8 *p_cur, *p_ref, *p_tmp8, *tmp; 131329a84457aed4c45bc900998b5e11c03023264208James Dong int curr_offset, ref_offset; 131429a84457aed4c45bc900998b5e11c03023264208James Dong uint8 tmp_res[24][24], tmp_in[24][24]; 131529a84457aed4c45bc900998b5e11c03023264208James Dong uint32 *p_tmp; 13164e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo uint32 pkres, tmp_result; 131729a84457aed4c45bc900998b5e11c03023264208James Dong int32 r0, r1, r2, r3, r4, r5; 131829a84457aed4c45bc900998b5e11c03023264208James Dong int32 r6, r7, r8, r9, r10, r13; 131929a84457aed4c45bc900998b5e11c03023264208James Dong 132029a84457aed4c45bc900998b5e11c03023264208James Dong ref_offset = inpitch - blkwidth; 132129a84457aed4c45bc900998b5e11c03023264208James Dong p_ref = in1 - 2; 132229a84457aed4c45bc900998b5e11c03023264208James Dong /* perform horizontal interpolation */ 132329a84457aed4c45bc900998b5e11c03023264208James Dong /* not word-aligned */ 132429a84457aed4c45bc900998b5e11c03023264208James Dong /* It is faster to read 1 byte at time to avoid calling CreateAlign */ 132529a84457aed4c45bc900998b5e11c03023264208James Dong /* if(((uint32)p_ref)&0x3) 132629a84457aed4c45bc900998b5e11c03023264208James Dong { 132729a84457aed4c45bc900998b5e11c03023264208James Dong CreateAlign(p_ref,inpitch,0,&tmp_in[0][0],blkwidth+8,blkheight); 132829a84457aed4c45bc900998b5e11c03023264208James Dong p_ref = &tmp_in[0][0]; 132929a84457aed4c45bc900998b5e11c03023264208James Dong ref_offset = 24-blkwidth; 133029a84457aed4c45bc900998b5e11c03023264208James Dong }*/ 133129a84457aed4c45bc900998b5e11c03023264208James Dong 133229a84457aed4c45bc900998b5e11c03023264208James Dong p_tmp = (uint32*) & (tmp_res[0][0]); 133329a84457aed4c45bc900998b5e11c03023264208James Dong for (j = blkheight; j > 0; j--) 133429a84457aed4c45bc900998b5e11c03023264208James Dong { 133529a84457aed4c45bc900998b5e11c03023264208James Dong r13 = 0; 13364e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo tmp = p_ref + blkwidth; 133729a84457aed4c45bc900998b5e11c03023264208James Dong 133829a84457aed4c45bc900998b5e11c03023264208James Dong //r0 = *((uint32*)p_ref); /* d,c,b,a */ 133929a84457aed4c45bc900998b5e11c03023264208James Dong //r1 = (r0>>8)&0xFF00FF; /* 0,d,0,b */ 134029a84457aed4c45bc900998b5e11c03023264208James Dong //r0 &= 0xFF00FF; /* 0,c,0,a */ 134129a84457aed4c45bc900998b5e11c03023264208James Dong /* It is faster to read 1 byte at a time */ 134229a84457aed4c45bc900998b5e11c03023264208James Dong r0 = p_ref[0]; 134329a84457aed4c45bc900998b5e11c03023264208James Dong r1 = p_ref[2]; 134429a84457aed4c45bc900998b5e11c03023264208James Dong r0 |= (r1 << 16); /* 0,c,0,a */ 134529a84457aed4c45bc900998b5e11c03023264208James Dong r1 = p_ref[1]; 134629a84457aed4c45bc900998b5e11c03023264208James Dong r2 = p_ref[3]; 134729a84457aed4c45bc900998b5e11c03023264208James Dong r1 |= (r2 << 16); /* 0,d,0,b */ 134829a84457aed4c45bc900998b5e11c03023264208James Dong 13494e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo while (p_ref < tmp) 135029a84457aed4c45bc900998b5e11c03023264208James Dong { 135129a84457aed4c45bc900998b5e11c03023264208James Dong //r2 = *((uint32*)(p_ref+=4));/* h,g,f,e */ 135229a84457aed4c45bc900998b5e11c03023264208James Dong //r3 = (r2>>8)&0xFF00FF; /* 0,h,0,f */ 135329a84457aed4c45bc900998b5e11c03023264208James Dong //r2 &= 0xFF00FF; /* 0,g,0,e */ 135429a84457aed4c45bc900998b5e11c03023264208James Dong /* It is faster to read 1 byte at a time */ 135529a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *(p_ref += 4); 135629a84457aed4c45bc900998b5e11c03023264208James Dong r3 = p_ref[2]; 135729a84457aed4c45bc900998b5e11c03023264208James Dong r2 |= (r3 << 16); /* 0,g,0,e */ 135829a84457aed4c45bc900998b5e11c03023264208James Dong r3 = p_ref[1]; 135929a84457aed4c45bc900998b5e11c03023264208James Dong r4 = p_ref[3]; 136029a84457aed4c45bc900998b5e11c03023264208James Dong r3 |= (r4 << 16); /* 0,h,0,f */ 136129a84457aed4c45bc900998b5e11c03023264208James Dong 136229a84457aed4c45bc900998b5e11c03023264208James Dong r4 = r0 + r3; /* c+h, a+f */ 136329a84457aed4c45bc900998b5e11c03023264208James Dong r5 = r0 + r1; /* c+d, a+b */ 136429a84457aed4c45bc900998b5e11c03023264208James Dong r6 = r2 + r3; /* g+h, e+f */ 136529a84457aed4c45bc900998b5e11c03023264208James Dong r5 >>= 16; 136629a84457aed4c45bc900998b5e11c03023264208James Dong r5 |= (r6 << 16); /* e+f, c+d */ 136729a84457aed4c45bc900998b5e11c03023264208James Dong r4 += r5 * 20; /* c+20*e+20*f+h, a+20*c+20*d+f */ 136829a84457aed4c45bc900998b5e11c03023264208James Dong r4 += 0x100010; /* +16, +16 */ 136929a84457aed4c45bc900998b5e11c03023264208James Dong r5 = r1 + r2; /* d+g, b+e */ 137029a84457aed4c45bc900998b5e11c03023264208James Dong r4 -= r5 * 5; /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */ 137129a84457aed4c45bc900998b5e11c03023264208James Dong r4 >>= 5; 137229a84457aed4c45bc900998b5e11c03023264208James Dong r13 |= r4; /* check clipping */ 137329a84457aed4c45bc900998b5e11c03023264208James Dong r4 &= 0xFF00FF; /* mask */ 137429a84457aed4c45bc900998b5e11c03023264208James Dong 137529a84457aed4c45bc900998b5e11c03023264208James Dong r5 = p_ref[4]; /* i */ 137629a84457aed4c45bc900998b5e11c03023264208James Dong r6 = (r5 << 16); 137729a84457aed4c45bc900998b5e11c03023264208James Dong r5 = r6 | (r2 >> 16);/* 0,i,0,g */ 137829a84457aed4c45bc900998b5e11c03023264208James Dong r5 += r1; /* d+i, b+g */ /* r5 not free */ 137929a84457aed4c45bc900998b5e11c03023264208James Dong r1 >>= 16; 138029a84457aed4c45bc900998b5e11c03023264208James Dong r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */ 138129a84457aed4c45bc900998b5e11c03023264208James Dong r1 += r2; /* f+g, d+e */ 138229a84457aed4c45bc900998b5e11c03023264208James Dong r5 += 20 * r1; /* d+20f+20g+i, b+20d+20e+g */ 138329a84457aed4c45bc900998b5e11c03023264208James Dong r0 >>= 16; 138429a84457aed4c45bc900998b5e11c03023264208James Dong r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */ 138529a84457aed4c45bc900998b5e11c03023264208James Dong r0 += r3; /* e+h, c+f */ 138629a84457aed4c45bc900998b5e11c03023264208James Dong r5 += 0x100010; /* 16,16 */ 138729a84457aed4c45bc900998b5e11c03023264208James Dong r5 -= r0 * 5; /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */ 138829a84457aed4c45bc900998b5e11c03023264208James Dong r5 >>= 5; 138929a84457aed4c45bc900998b5e11c03023264208James Dong r13 |= r5; /* check clipping */ 139029a84457aed4c45bc900998b5e11c03023264208James Dong r5 &= 0xFF00FF; /* mask */ 139129a84457aed4c45bc900998b5e11c03023264208James Dong 139229a84457aed4c45bc900998b5e11c03023264208James Dong r4 |= (r5 << 8); /* pack them together */ 139329a84457aed4c45bc900998b5e11c03023264208James Dong *p_tmp++ = r4; 139429a84457aed4c45bc900998b5e11c03023264208James Dong r1 = r3; 139529a84457aed4c45bc900998b5e11c03023264208James Dong r0 = r2; 139629a84457aed4c45bc900998b5e11c03023264208James Dong } 139729a84457aed4c45bc900998b5e11c03023264208James Dong p_tmp += ((24 - blkwidth) >> 2); /* move to the next line */ 139829a84457aed4c45bc900998b5e11c03023264208James Dong p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */ 139929a84457aed4c45bc900998b5e11c03023264208James Dong 140029a84457aed4c45bc900998b5e11c03023264208James Dong if (r13&0xFF000700) /* need clipping */ 140129a84457aed4c45bc900998b5e11c03023264208James Dong { 140229a84457aed4c45bc900998b5e11c03023264208James Dong /* move back to the beginning of the line */ 140329a84457aed4c45bc900998b5e11c03023264208James Dong p_ref -= (ref_offset + blkwidth); /* input */ 140429a84457aed4c45bc900998b5e11c03023264208James Dong p_tmp -= 6; /* intermediate output */ 14054e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo tmp = p_ref + blkwidth; 14064e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo while (p_ref < tmp) 140729a84457aed4c45bc900998b5e11c03023264208James Dong { 140829a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *p_ref++; 140929a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *p_ref++; 141029a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *p_ref++; 141129a84457aed4c45bc900998b5e11c03023264208James Dong r3 = *p_ref++; 141229a84457aed4c45bc900998b5e11c03023264208James Dong r4 = *p_ref++; 141329a84457aed4c45bc900998b5e11c03023264208James Dong /* first pixel */ 141429a84457aed4c45bc900998b5e11c03023264208James Dong r5 = *p_ref++; 141529a84457aed4c45bc900998b5e11c03023264208James Dong result = (r0 + r5); 141629a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r1 + r4); 141729a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 141829a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r2 + r3); 141929a84457aed4c45bc900998b5e11c03023264208James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 142029a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 142129a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 142229a84457aed4c45bc900998b5e11c03023264208James Dong pkres = result; 142329a84457aed4c45bc900998b5e11c03023264208James Dong /* second pixel */ 142429a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *p_ref++; 142529a84457aed4c45bc900998b5e11c03023264208James Dong result = (r1 + r0); 142629a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r2 + r5); 142729a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 142829a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r3 + r4); 142929a84457aed4c45bc900998b5e11c03023264208James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 143029a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 143129a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 143229a84457aed4c45bc900998b5e11c03023264208James Dong pkres |= (result << 8); 143329a84457aed4c45bc900998b5e11c03023264208James Dong /* third pixel */ 143429a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *p_ref++; 143529a84457aed4c45bc900998b5e11c03023264208James Dong result = (r2 + r1); 143629a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r3 + r0); 143729a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 143829a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r4 + r5); 143929a84457aed4c45bc900998b5e11c03023264208James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 144029a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 144129a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 144229a84457aed4c45bc900998b5e11c03023264208James Dong pkres |= (result << 16); 144329a84457aed4c45bc900998b5e11c03023264208James Dong /* fourth pixel */ 144429a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *p_ref++; 144529a84457aed4c45bc900998b5e11c03023264208James Dong result = (r3 + r2); 144629a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r4 + r1); 144729a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 144829a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r5 + r0); 144929a84457aed4c45bc900998b5e11c03023264208James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 145029a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 145129a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 145229a84457aed4c45bc900998b5e11c03023264208James Dong pkres |= (result << 24); 145329a84457aed4c45bc900998b5e11c03023264208James Dong 145429a84457aed4c45bc900998b5e11c03023264208James Dong *p_tmp++ = pkres; /* write 4 pixel */ 145529a84457aed4c45bc900998b5e11c03023264208James Dong p_ref -= 5; 145629a84457aed4c45bc900998b5e11c03023264208James Dong } 145729a84457aed4c45bc900998b5e11c03023264208James Dong p_tmp += ((24 - blkwidth) >> 2); /* move to the next line */ 145829a84457aed4c45bc900998b5e11c03023264208James Dong p_ref += ref_offset; /* ref_offset = inpitch-blkwidth; */ 145929a84457aed4c45bc900998b5e11c03023264208James Dong } 146029a84457aed4c45bc900998b5e11c03023264208James Dong } 146129a84457aed4c45bc900998b5e11c03023264208James Dong 146229a84457aed4c45bc900998b5e11c03023264208James Dong /* perform vertical interpolation */ 146329a84457aed4c45bc900998b5e11c03023264208James Dong /* not word-aligned */ 14644b43b41eaf8c4c80f66185e13620cf94b8b2ef5bMartin Storsjo if (((intptr_t)in2)&0x3) 146529a84457aed4c45bc900998b5e11c03023264208James Dong { 146629a84457aed4c45bc900998b5e11c03023264208James Dong eCreateAlign(in2, inpitch, -2, &tmp_in[0][0], blkwidth, blkheight + 5); 146729a84457aed4c45bc900998b5e11c03023264208James Dong in2 = &tmp_in[2][0]; 146829a84457aed4c45bc900998b5e11c03023264208James Dong inpitch = 24; 146929a84457aed4c45bc900998b5e11c03023264208James Dong } 147029a84457aed4c45bc900998b5e11c03023264208James Dong 147129a84457aed4c45bc900998b5e11c03023264208James Dong p_cur = out; 147229a84457aed4c45bc900998b5e11c03023264208James Dong curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically up and one pixel right */ 147329a84457aed4c45bc900998b5e11c03023264208James Dong pkres = blkheight * inpitch; /* reuse it for limit */ 147429a84457aed4c45bc900998b5e11c03023264208James Dong 147529a84457aed4c45bc900998b5e11c03023264208James Dong curr_offset += 3; 147629a84457aed4c45bc900998b5e11c03023264208James Dong 147729a84457aed4c45bc900998b5e11c03023264208James Dong for (j = 0; j < blkwidth; j += 4, in2 += 4) 147829a84457aed4c45bc900998b5e11c03023264208James Dong { 147929a84457aed4c45bc900998b5e11c03023264208James Dong r13 = 0; 148029a84457aed4c45bc900998b5e11c03023264208James Dong p_ref = in2; 148129a84457aed4c45bc900998b5e11c03023264208James Dong p_tmp8 = &(tmp_res[0][j]); /* intermediate result */ 148229a84457aed4c45bc900998b5e11c03023264208James Dong p_tmp8 -= 24; /* compensate for the first offset */ 148329a84457aed4c45bc900998b5e11c03023264208James Dong p_cur -= outpitch; /* compensate for the first offset */ 14844e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo tmp = p_ref + pkres; /* limit */ 14854e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo while (p_ref < tmp) /* the loop un-rolled */ 148629a84457aed4c45bc900998b5e11c03023264208James Dong { 148729a84457aed4c45bc900998b5e11c03023264208James Dong /* Read 1 byte at a time is too slow, too many read and pack ops, need to call CreateAlign */ 148829a84457aed4c45bc900998b5e11c03023264208James Dong /*p_ref8 = p_ref-(inpitch<<1); r0 = p_ref8[0]; r1 = p_ref8[2]; 148929a84457aed4c45bc900998b5e11c03023264208James Dong r0 |= (r1<<16); r6 = p_ref8[1]; r1 = p_ref8[3]; 149029a84457aed4c45bc900998b5e11c03023264208James Dong r6 |= (r1<<16); p_ref+=inpitch; */ 149129a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */ 149229a84457aed4c45bc900998b5e11c03023264208James Dong p_ref += inpitch; 149329a84457aed4c45bc900998b5e11c03023264208James Dong r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */ 149429a84457aed4c45bc900998b5e11c03023264208James Dong r0 &= 0xFF00FF; 149529a84457aed4c45bc900998b5e11c03023264208James Dong 149629a84457aed4c45bc900998b5e11c03023264208James Dong /*p_ref8 = p_ref+(inpitch<<1); 149729a84457aed4c45bc900998b5e11c03023264208James Dong r1 = p_ref8[0]; r7 = p_ref8[2]; r1 |= (r7<<16); 149829a84457aed4c45bc900998b5e11c03023264208James Dong r7 = p_ref8[1]; r2 = p_ref8[3]; r7 |= (r2<<16);*/ 149929a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *((uint32*)(p_ref + (inpitch << 1))); /* r1, r7, ref[3] */ 150029a84457aed4c45bc900998b5e11c03023264208James Dong r7 = (r1 >> 8) & 0xFF00FF; 150129a84457aed4c45bc900998b5e11c03023264208James Dong r1 &= 0xFF00FF; 150229a84457aed4c45bc900998b5e11c03023264208James Dong 150329a84457aed4c45bc900998b5e11c03023264208James Dong r0 += r1; 150429a84457aed4c45bc900998b5e11c03023264208James Dong r6 += r7; 150529a84457aed4c45bc900998b5e11c03023264208James Dong 150629a84457aed4c45bc900998b5e11c03023264208James Dong /*r2 = p_ref[0]; r8 = p_ref[2]; r2 |= (r8<<16); 150729a84457aed4c45bc900998b5e11c03023264208James Dong r8 = p_ref[1]; r1 = p_ref[3]; r8 |= (r1<<16);*/ 150829a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */ 150929a84457aed4c45bc900998b5e11c03023264208James Dong r8 = (r2 >> 8) & 0xFF00FF; 151029a84457aed4c45bc900998b5e11c03023264208James Dong r2 &= 0xFF00FF; 151129a84457aed4c45bc900998b5e11c03023264208James Dong 151229a84457aed4c45bc900998b5e11c03023264208James Dong /*p_ref8 = p_ref-inpitch; r1 = p_ref8[0]; r7 = p_ref8[2]; 151329a84457aed4c45bc900998b5e11c03023264208James Dong r1 |= (r7<<16); r1 += r2; r7 = p_ref8[1]; 151429a84457aed4c45bc900998b5e11c03023264208James Dong r2 = p_ref8[3]; r7 |= (r2<<16);*/ 151529a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */ 151629a84457aed4c45bc900998b5e11c03023264208James Dong r7 = (r1 >> 8) & 0xFF00FF; 151729a84457aed4c45bc900998b5e11c03023264208James Dong r1 &= 0xFF00FF; 151829a84457aed4c45bc900998b5e11c03023264208James Dong r1 += r2; 151929a84457aed4c45bc900998b5e11c03023264208James Dong 152029a84457aed4c45bc900998b5e11c03023264208James Dong r7 += r8; 152129a84457aed4c45bc900998b5e11c03023264208James Dong 152229a84457aed4c45bc900998b5e11c03023264208James Dong r0 += 20 * r1; 152329a84457aed4c45bc900998b5e11c03023264208James Dong r6 += 20 * r7; 152429a84457aed4c45bc900998b5e11c03023264208James Dong r0 += 0x100010; 152529a84457aed4c45bc900998b5e11c03023264208James Dong r6 += 0x100010; 152629a84457aed4c45bc900998b5e11c03023264208James Dong 152729a84457aed4c45bc900998b5e11c03023264208James Dong /*p_ref8 = p_ref-(inpitch<<1); r2 = p_ref8[0]; r8 = p_ref8[2]; 152829a84457aed4c45bc900998b5e11c03023264208James Dong r2 |= (r8<<16); r8 = p_ref8[1]; r1 = p_ref8[3]; r8 |= (r1<<16);*/ 152929a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */ 153029a84457aed4c45bc900998b5e11c03023264208James Dong r8 = (r2 >> 8) & 0xFF00FF; 153129a84457aed4c45bc900998b5e11c03023264208James Dong r2 &= 0xFF00FF; 153229a84457aed4c45bc900998b5e11c03023264208James Dong 153329a84457aed4c45bc900998b5e11c03023264208James Dong /*p_ref8 = p_ref+inpitch; r1 = p_ref8[0]; r7 = p_ref8[2]; 153429a84457aed4c45bc900998b5e11c03023264208James Dong r1 |= (r7<<16); r1 += r2; r7 = p_ref8[1]; 153529a84457aed4c45bc900998b5e11c03023264208James Dong r2 = p_ref8[3]; r7 |= (r2<<16);*/ 153629a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */ 153729a84457aed4c45bc900998b5e11c03023264208James Dong r7 = (r1 >> 8) & 0xFF00FF; 153829a84457aed4c45bc900998b5e11c03023264208James Dong r1 &= 0xFF00FF; 153929a84457aed4c45bc900998b5e11c03023264208James Dong r1 += r2; 154029a84457aed4c45bc900998b5e11c03023264208James Dong 154129a84457aed4c45bc900998b5e11c03023264208James Dong r7 += r8; 154229a84457aed4c45bc900998b5e11c03023264208James Dong 154329a84457aed4c45bc900998b5e11c03023264208James Dong r0 -= 5 * r1; 154429a84457aed4c45bc900998b5e11c03023264208James Dong r6 -= 5 * r7; 154529a84457aed4c45bc900998b5e11c03023264208James Dong 154629a84457aed4c45bc900998b5e11c03023264208James Dong r0 >>= 5; 154729a84457aed4c45bc900998b5e11c03023264208James Dong r6 >>= 5; 154829a84457aed4c45bc900998b5e11c03023264208James Dong /* clip */ 154929a84457aed4c45bc900998b5e11c03023264208James Dong r13 |= r6; 155029a84457aed4c45bc900998b5e11c03023264208James Dong r13 |= r0; 155129a84457aed4c45bc900998b5e11c03023264208James Dong //CLIPPACK(r6,result) 155229a84457aed4c45bc900998b5e11c03023264208James Dong /* add with horizontal results */ 155329a84457aed4c45bc900998b5e11c03023264208James Dong r10 = *((uint32*)(p_tmp8 += 24)); 155429a84457aed4c45bc900998b5e11c03023264208James Dong r9 = (r10 >> 8) & 0xFF00FF; 155529a84457aed4c45bc900998b5e11c03023264208James Dong r10 &= 0xFF00FF; 155629a84457aed4c45bc900998b5e11c03023264208James Dong 155729a84457aed4c45bc900998b5e11c03023264208James Dong r0 += r10; 155829a84457aed4c45bc900998b5e11c03023264208James Dong r0 += 0x10001; 155929a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r0 >> 1) & 0xFF00FF; /* mask to 8 bytes */ 156029a84457aed4c45bc900998b5e11c03023264208James Dong 156129a84457aed4c45bc900998b5e11c03023264208James Dong r6 += r9; 156229a84457aed4c45bc900998b5e11c03023264208James Dong r6 += 0x10001; 156329a84457aed4c45bc900998b5e11c03023264208James Dong r6 = (r6 >> 1) & 0xFF00FF; /* mask to 8 bytes */ 156429a84457aed4c45bc900998b5e11c03023264208James Dong 156529a84457aed4c45bc900998b5e11c03023264208James Dong r0 |= (r6 << 8); /* pack it back */ 156629a84457aed4c45bc900998b5e11c03023264208James Dong *((uint32*)(p_cur += outpitch)) = r0; 156729a84457aed4c45bc900998b5e11c03023264208James Dong } 156829a84457aed4c45bc900998b5e11c03023264208James Dong p_cur += curr_offset; /* offset to the next pixel */ 156929a84457aed4c45bc900998b5e11c03023264208James Dong if (r13 & 0xFF000700) /* this column need clipping */ 157029a84457aed4c45bc900998b5e11c03023264208James Dong { 157129a84457aed4c45bc900998b5e11c03023264208James Dong p_cur -= 4; 157229a84457aed4c45bc900998b5e11c03023264208James Dong for (i = 0; i < 4; i++) 157329a84457aed4c45bc900998b5e11c03023264208James Dong { 157429a84457aed4c45bc900998b5e11c03023264208James Dong p_ref = in2 + i; 157529a84457aed4c45bc900998b5e11c03023264208James Dong p_tmp8 = &(tmp_res[0][j+i]); /* intermediate result */ 157629a84457aed4c45bc900998b5e11c03023264208James Dong p_tmp8 -= 24; /* compensate for the first offset */ 157729a84457aed4c45bc900998b5e11c03023264208James Dong p_cur -= outpitch; /* compensate for the first offset */ 15784e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo tmp = p_ref + pkres; /* limit */ 15794e1d7b8d16abbe8a60fa3957646297b552e82fb0Martin Storsjo while (p_ref < tmp) /* the loop un-rolled */ 158029a84457aed4c45bc900998b5e11c03023264208James Dong { 158129a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *(p_ref - (inpitch << 1)); 158229a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *(p_ref - inpitch); 158329a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *p_ref; 158429a84457aed4c45bc900998b5e11c03023264208James Dong r3 = *(p_ref += inpitch); /* modify pointer before loading */ 158529a84457aed4c45bc900998b5e11c03023264208James Dong r4 = *(p_ref += inpitch); 158629a84457aed4c45bc900998b5e11c03023264208James Dong /* first pixel */ 158729a84457aed4c45bc900998b5e11c03023264208James Dong r5 = *(p_ref += inpitch); 158829a84457aed4c45bc900998b5e11c03023264208James Dong result = (r0 + r5); 158929a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r1 + r4); 159029a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r0 * 5);//result -= r0; result -= (r0<<2); 159129a84457aed4c45bc900998b5e11c03023264208James Dong r0 = (r2 + r3); 159229a84457aed4c45bc900998b5e11c03023264208James Dong result += (r0 * 20);//result += (r0<<4); result += (r0<<2); 159329a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 159429a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 159529a84457aed4c45bc900998b5e11c03023264208James Dong tmp_result = *(p_tmp8 += 24); /* modify pointer before loading */ 159629a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + tmp_result + 1); /* no clip */ 159729a84457aed4c45bc900998b5e11c03023264208James Dong result = (result >> 1); 159829a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 159929a84457aed4c45bc900998b5e11c03023264208James Dong /* second pixel */ 160029a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *(p_ref += inpitch); 160129a84457aed4c45bc900998b5e11c03023264208James Dong result = (r1 + r0); 160229a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r2 + r5); 160329a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r1 * 5);//result -= r1; result -= (r1<<2); 160429a84457aed4c45bc900998b5e11c03023264208James Dong r1 = (r3 + r4); 160529a84457aed4c45bc900998b5e11c03023264208James Dong result += (r1 * 20);//result += (r1<<4); result += (r1<<2); 160629a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 160729a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 160829a84457aed4c45bc900998b5e11c03023264208James Dong tmp_result = *(p_tmp8 += 24); /* intermediate result */ 160929a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + tmp_result + 1); /* no clip */ 161029a84457aed4c45bc900998b5e11c03023264208James Dong result = (result >> 1); 161129a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 161229a84457aed4c45bc900998b5e11c03023264208James Dong /* third pixel */ 161329a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *(p_ref += inpitch); 161429a84457aed4c45bc900998b5e11c03023264208James Dong result = (r2 + r1); 161529a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r3 + r0); 161629a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r2 * 5);//result -= r2; result -= (r2<<2); 161729a84457aed4c45bc900998b5e11c03023264208James Dong r2 = (r4 + r5); 161829a84457aed4c45bc900998b5e11c03023264208James Dong result += (r2 * 20);//result += (r2<<4); result += (r2<<2); 161929a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 162029a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 162129a84457aed4c45bc900998b5e11c03023264208James Dong tmp_result = *(p_tmp8 += 24); /* intermediate result */ 162229a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + tmp_result + 1); /* no clip */ 162329a84457aed4c45bc900998b5e11c03023264208James Dong result = (result >> 1); 162429a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 162529a84457aed4c45bc900998b5e11c03023264208James Dong /* fourth pixel */ 162629a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *(p_ref += inpitch); 162729a84457aed4c45bc900998b5e11c03023264208James Dong result = (r3 + r2); 162829a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r4 + r1); 162929a84457aed4c45bc900998b5e11c03023264208James Dong result -= (r3 * 5);//result -= r3; result -= (r3<<2); 163029a84457aed4c45bc900998b5e11c03023264208James Dong r3 = (r5 + r0); 163129a84457aed4c45bc900998b5e11c03023264208James Dong result += (r3 * 20);//result += (r3<<4); result += (r3<<2); 163229a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + 16) >> 5; 163329a84457aed4c45bc900998b5e11c03023264208James Dong CLIP_RESULT(result) 163429a84457aed4c45bc900998b5e11c03023264208James Dong tmp_result = *(p_tmp8 += 24); /* intermediate result */ 163529a84457aed4c45bc900998b5e11c03023264208James Dong result = (result + tmp_result + 1); /* no clip */ 163629a84457aed4c45bc900998b5e11c03023264208James Dong result = (result >> 1); 163729a84457aed4c45bc900998b5e11c03023264208James Dong *(p_cur += outpitch) = result; 163829a84457aed4c45bc900998b5e11c03023264208James Dong p_ref -= (inpitch << 1); /* move back to center of the filter of the next one */ 163929a84457aed4c45bc900998b5e11c03023264208James Dong } 164029a84457aed4c45bc900998b5e11c03023264208James Dong p_cur += (curr_offset - 3); 164129a84457aed4c45bc900998b5e11c03023264208James Dong } 164229a84457aed4c45bc900998b5e11c03023264208James Dong } 164329a84457aed4c45bc900998b5e11c03023264208James Dong } 164429a84457aed4c45bc900998b5e11c03023264208James Dong 164529a84457aed4c45bc900998b5e11c03023264208James Dong return ; 164629a84457aed4c45bc900998b5e11c03023264208James Dong} 164729a84457aed4c45bc900998b5e11c03023264208James Dong 164829a84457aed4c45bc900998b5e11c03023264208James Dong/* position G */ 164929a84457aed4c45bc900998b5e11c03023264208James Dongvoid eFullPelMC(uint8 *in, int inpitch, uint8 *out, int outpitch, 165029a84457aed4c45bc900998b5e11c03023264208James Dong int blkwidth, int blkheight) 165129a84457aed4c45bc900998b5e11c03023264208James Dong{ 165229a84457aed4c45bc900998b5e11c03023264208James Dong int i, j; 165329a84457aed4c45bc900998b5e11c03023264208James Dong int offset_in = inpitch - blkwidth; 165429a84457aed4c45bc900998b5e11c03023264208James Dong int offset_out = outpitch - blkwidth; 165529a84457aed4c45bc900998b5e11c03023264208James Dong uint32 temp; 165629a84457aed4c45bc900998b5e11c03023264208James Dong uint8 byte; 165729a84457aed4c45bc900998b5e11c03023264208James Dong 16584b43b41eaf8c4c80f66185e13620cf94b8b2ef5bMartin Storsjo if (((intptr_t)in)&3) 165929a84457aed4c45bc900998b5e11c03023264208James Dong { 166029a84457aed4c45bc900998b5e11c03023264208James Dong for (j = blkheight; j > 0; j--) 166129a84457aed4c45bc900998b5e11c03023264208James Dong { 166229a84457aed4c45bc900998b5e11c03023264208James Dong for (i = blkwidth; i > 0; i -= 4) 166329a84457aed4c45bc900998b5e11c03023264208James Dong { 166429a84457aed4c45bc900998b5e11c03023264208James Dong temp = *in++; 166529a84457aed4c45bc900998b5e11c03023264208James Dong byte = *in++; 166629a84457aed4c45bc900998b5e11c03023264208James Dong temp |= (byte << 8); 166729a84457aed4c45bc900998b5e11c03023264208James Dong byte = *in++; 166829a84457aed4c45bc900998b5e11c03023264208James Dong temp |= (byte << 16); 166929a84457aed4c45bc900998b5e11c03023264208James Dong byte = *in++; 167029a84457aed4c45bc900998b5e11c03023264208James Dong temp |= (byte << 24); 167129a84457aed4c45bc900998b5e11c03023264208James Dong 167229a84457aed4c45bc900998b5e11c03023264208James Dong *((uint32*)out) = temp; /* write 4 bytes */ 167329a84457aed4c45bc900998b5e11c03023264208James Dong out += 4; 167429a84457aed4c45bc900998b5e11c03023264208James Dong } 167529a84457aed4c45bc900998b5e11c03023264208James Dong out += offset_out; 167629a84457aed4c45bc900998b5e11c03023264208James Dong in += offset_in; 167729a84457aed4c45bc900998b5e11c03023264208James Dong } 167829a84457aed4c45bc900998b5e11c03023264208James Dong } 167929a84457aed4c45bc900998b5e11c03023264208James Dong else 168029a84457aed4c45bc900998b5e11c03023264208James Dong { 168129a84457aed4c45bc900998b5e11c03023264208James Dong for (j = blkheight; j > 0; j--) 168229a84457aed4c45bc900998b5e11c03023264208James Dong { 168329a84457aed4c45bc900998b5e11c03023264208James Dong for (i = blkwidth; i > 0; i -= 4) 168429a84457aed4c45bc900998b5e11c03023264208James Dong { 168529a84457aed4c45bc900998b5e11c03023264208James Dong temp = *((uint32*)in); 168629a84457aed4c45bc900998b5e11c03023264208James Dong *((uint32*)out) = temp; 168729a84457aed4c45bc900998b5e11c03023264208James Dong in += 4; 168829a84457aed4c45bc900998b5e11c03023264208James Dong out += 4; 168929a84457aed4c45bc900998b5e11c03023264208James Dong } 169029a84457aed4c45bc900998b5e11c03023264208James Dong out += offset_out; 169129a84457aed4c45bc900998b5e11c03023264208James Dong in += offset_in; 169229a84457aed4c45bc900998b5e11c03023264208James Dong } 169329a84457aed4c45bc900998b5e11c03023264208James Dong } 169429a84457aed4c45bc900998b5e11c03023264208James Dong return ; 169529a84457aed4c45bc900998b5e11c03023264208James Dong} 169629a84457aed4c45bc900998b5e11c03023264208James Dong 169729a84457aed4c45bc900998b5e11c03023264208James Dongvoid ePadChroma(uint8 *ref, int picwidth, int picheight, int picpitch, int x_pos, int y_pos) 169829a84457aed4c45bc900998b5e11c03023264208James Dong{ 169929a84457aed4c45bc900998b5e11c03023264208James Dong int pad_height; 170029a84457aed4c45bc900998b5e11c03023264208James Dong int pad_width; 170129a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *start; 170229a84457aed4c45bc900998b5e11c03023264208James Dong uint32 word1, word2, word3; 170329a84457aed4c45bc900998b5e11c03023264208James Dong int offset, j; 170429a84457aed4c45bc900998b5e11c03023264208James Dong 170529a84457aed4c45bc900998b5e11c03023264208James Dong 170629a84457aed4c45bc900998b5e11c03023264208James Dong pad_height = 8 + ((y_pos & 7) ? 1 : 0); 170729a84457aed4c45bc900998b5e11c03023264208James Dong pad_width = 8 + ((x_pos & 7) ? 1 : 0); 170829a84457aed4c45bc900998b5e11c03023264208James Dong 170929a84457aed4c45bc900998b5e11c03023264208James Dong y_pos >>= 3; 171029a84457aed4c45bc900998b5e11c03023264208James Dong x_pos >>= 3; 171129a84457aed4c45bc900998b5e11c03023264208James Dong // pad vertical first 171229a84457aed4c45bc900998b5e11c03023264208James Dong if (y_pos < 0) // need to pad up 171329a84457aed4c45bc900998b5e11c03023264208James Dong { 171429a84457aed4c45bc900998b5e11c03023264208James Dong if (x_pos < -8) start = ref - 8; 171529a84457aed4c45bc900998b5e11c03023264208James Dong else if (x_pos + pad_width > picwidth + 7) start = ref + picwidth + 7 - pad_width; 171629a84457aed4c45bc900998b5e11c03023264208James Dong else start = ref + x_pos; 171729a84457aed4c45bc900998b5e11c03023264208James Dong 171829a84457aed4c45bc900998b5e11c03023264208James Dong /* word-align start */ 17194b43b41eaf8c4c80f66185e13620cf94b8b2ef5bMartin Storsjo offset = (intptr_t)start & 0x3; 172029a84457aed4c45bc900998b5e11c03023264208James Dong if (offset) start -= offset; 172129a84457aed4c45bc900998b5e11c03023264208James Dong 172229a84457aed4c45bc900998b5e11c03023264208James Dong word1 = *((uint32*)start); 172329a84457aed4c45bc900998b5e11c03023264208James Dong word2 = *((uint32*)(start + 4)); 172429a84457aed4c45bc900998b5e11c03023264208James Dong word3 = *((uint32*)(start + 8)); 172529a84457aed4c45bc900998b5e11c03023264208James Dong 172629a84457aed4c45bc900998b5e11c03023264208James Dong /* pad up N rows */ 172729a84457aed4c45bc900998b5e11c03023264208James Dong j = -y_pos; 172829a84457aed4c45bc900998b5e11c03023264208James Dong if (j > 8) j = 8; 172929a84457aed4c45bc900998b5e11c03023264208James Dong while (j--) 173029a84457aed4c45bc900998b5e11c03023264208James Dong { 173129a84457aed4c45bc900998b5e11c03023264208James Dong *((uint32*)(start -= picpitch)) = word1; 173229a84457aed4c45bc900998b5e11c03023264208James Dong *((uint32*)(start + 4)) = word2; 173329a84457aed4c45bc900998b5e11c03023264208James Dong *((uint32*)(start + 8)) = word3; 173429a84457aed4c45bc900998b5e11c03023264208James Dong } 173529a84457aed4c45bc900998b5e11c03023264208James Dong 173629a84457aed4c45bc900998b5e11c03023264208James Dong } 173729a84457aed4c45bc900998b5e11c03023264208James Dong else if (y_pos + pad_height >= picheight) /* pad down */ 173829a84457aed4c45bc900998b5e11c03023264208James Dong { 173929a84457aed4c45bc900998b5e11c03023264208James Dong if (x_pos < -8) start = ref + picpitch * (picheight - 1) - 8; 174029a84457aed4c45bc900998b5e11c03023264208James Dong else if (x_pos + pad_width > picwidth + 7) start = ref + picpitch * (picheight - 1) + 174129a84457aed4c45bc900998b5e11c03023264208James Dong picwidth + 7 - pad_width; 174229a84457aed4c45bc900998b5e11c03023264208James Dong else start = ref + picpitch * (picheight - 1) + x_pos; 174329a84457aed4c45bc900998b5e11c03023264208James Dong 174429a84457aed4c45bc900998b5e11c03023264208James Dong /* word-align start */ 17454b43b41eaf8c4c80f66185e13620cf94b8b2ef5bMartin Storsjo offset = (intptr_t)start & 0x3; 174629a84457aed4c45bc900998b5e11c03023264208James Dong if (offset) start -= offset; 174729a84457aed4c45bc900998b5e11c03023264208James Dong 174829a84457aed4c45bc900998b5e11c03023264208James Dong word1 = *((uint32*)start); 174929a84457aed4c45bc900998b5e11c03023264208James Dong word2 = *((uint32*)(start + 4)); 175029a84457aed4c45bc900998b5e11c03023264208James Dong word3 = *((uint32*)(start + 8)); 175129a84457aed4c45bc900998b5e11c03023264208James Dong 175229a84457aed4c45bc900998b5e11c03023264208James Dong /* pad down N rows */ 175329a84457aed4c45bc900998b5e11c03023264208James Dong j = y_pos + pad_height - picheight; 175429a84457aed4c45bc900998b5e11c03023264208James Dong if (j > 8) j = 8; 175529a84457aed4c45bc900998b5e11c03023264208James Dong while (j--) 175629a84457aed4c45bc900998b5e11c03023264208James Dong { 175729a84457aed4c45bc900998b5e11c03023264208James Dong *((uint32*)(start += picpitch)) = word1; 175829a84457aed4c45bc900998b5e11c03023264208James Dong *((uint32*)(start + 4)) = word2; 175929a84457aed4c45bc900998b5e11c03023264208James Dong *((uint32*)(start + 8)) = word3; 176029a84457aed4c45bc900998b5e11c03023264208James Dong } 176129a84457aed4c45bc900998b5e11c03023264208James Dong } 176229a84457aed4c45bc900998b5e11c03023264208James Dong 176329a84457aed4c45bc900998b5e11c03023264208James Dong /* now pad horizontal */ 176429a84457aed4c45bc900998b5e11c03023264208James Dong if (x_pos < 0) // pad left 176529a84457aed4c45bc900998b5e11c03023264208James Dong { 176629a84457aed4c45bc900998b5e11c03023264208James Dong if (y_pos < -8) start = ref - (picpitch << 3); 176729a84457aed4c45bc900998b5e11c03023264208James Dong else if (y_pos + pad_height > picheight + 7) start = ref + (picheight + 7 - pad_height) * picpitch; 176829a84457aed4c45bc900998b5e11c03023264208James Dong else start = ref + y_pos * picpitch; 176929a84457aed4c45bc900998b5e11c03023264208James Dong 177029a84457aed4c45bc900998b5e11c03023264208James Dong // now pad left 8 pixels for pad_height rows */ 177129a84457aed4c45bc900998b5e11c03023264208James Dong j = pad_height; 177229a84457aed4c45bc900998b5e11c03023264208James Dong start -= picpitch; 177329a84457aed4c45bc900998b5e11c03023264208James Dong while (j--) 177429a84457aed4c45bc900998b5e11c03023264208James Dong { 177529a84457aed4c45bc900998b5e11c03023264208James Dong word1 = *(start += picpitch); 177629a84457aed4c45bc900998b5e11c03023264208James Dong word1 |= (word1 << 8); 177729a84457aed4c45bc900998b5e11c03023264208James Dong word1 |= (word1 << 16); 177829a84457aed4c45bc900998b5e11c03023264208James Dong *((uint32*)(start - 8)) = word1; 177929a84457aed4c45bc900998b5e11c03023264208James Dong *((uint32*)(start - 4)) = word1; 178029a84457aed4c45bc900998b5e11c03023264208James Dong } 178129a84457aed4c45bc900998b5e11c03023264208James Dong } 178229a84457aed4c45bc900998b5e11c03023264208James Dong else if (x_pos + pad_width >= picwidth) /* pad right */ 178329a84457aed4c45bc900998b5e11c03023264208James Dong { 178429a84457aed4c45bc900998b5e11c03023264208James Dong if (y_pos < -8) start = ref - (picpitch << 3) + picwidth - 1; 178529a84457aed4c45bc900998b5e11c03023264208James Dong else if (y_pos + pad_height > picheight + 7) start = ref + (picheight + 7 - pad_height) * picpitch + picwidth - 1; 178629a84457aed4c45bc900998b5e11c03023264208James Dong else start = ref + y_pos * picpitch + picwidth - 1; 178729a84457aed4c45bc900998b5e11c03023264208James Dong 178829a84457aed4c45bc900998b5e11c03023264208James Dong // now pad right 8 pixels for pad_height rows */ 178929a84457aed4c45bc900998b5e11c03023264208James Dong j = pad_height; 179029a84457aed4c45bc900998b5e11c03023264208James Dong start -= picpitch; 179129a84457aed4c45bc900998b5e11c03023264208James Dong while (j--) 179229a84457aed4c45bc900998b5e11c03023264208James Dong { 179329a84457aed4c45bc900998b5e11c03023264208James Dong word1 = *(start += picpitch); 179429a84457aed4c45bc900998b5e11c03023264208James Dong word1 |= (word1 << 8); 179529a84457aed4c45bc900998b5e11c03023264208James Dong word1 |= (word1 << 16); 179629a84457aed4c45bc900998b5e11c03023264208James Dong *((uint32*)(start + 1)) = word1; 179729a84457aed4c45bc900998b5e11c03023264208James Dong *((uint32*)(start + 5)) = word1; 179829a84457aed4c45bc900998b5e11c03023264208James Dong } 179929a84457aed4c45bc900998b5e11c03023264208James Dong } 180029a84457aed4c45bc900998b5e11c03023264208James Dong 180129a84457aed4c45bc900998b5e11c03023264208James Dong return ; 180229a84457aed4c45bc900998b5e11c03023264208James Dong} 180329a84457aed4c45bc900998b5e11c03023264208James Dong 180429a84457aed4c45bc900998b5e11c03023264208James Dong 180529a84457aed4c45bc900998b5e11c03023264208James Dongvoid eChromaMotionComp(uint8 *ref, int picwidth, int picheight, 180629a84457aed4c45bc900998b5e11c03023264208James Dong int x_pos, int y_pos, 180729a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *pred, int picpitch, 180829a84457aed4c45bc900998b5e11c03023264208James Dong int blkwidth, int blkheight) 180929a84457aed4c45bc900998b5e11c03023264208James Dong{ 181029a84457aed4c45bc900998b5e11c03023264208James Dong int dx, dy; 181129a84457aed4c45bc900998b5e11c03023264208James Dong int offset_dx, offset_dy; 181229a84457aed4c45bc900998b5e11c03023264208James Dong int index; 181329a84457aed4c45bc900998b5e11c03023264208James Dong 181429a84457aed4c45bc900998b5e11c03023264208James Dong ePadChroma(ref, picwidth, picheight, picpitch, x_pos, y_pos); 181529a84457aed4c45bc900998b5e11c03023264208James Dong 181629a84457aed4c45bc900998b5e11c03023264208James Dong dx = x_pos & 7; 181729a84457aed4c45bc900998b5e11c03023264208James Dong dy = y_pos & 7; 181829a84457aed4c45bc900998b5e11c03023264208James Dong offset_dx = (dx + 7) >> 3; 181929a84457aed4c45bc900998b5e11c03023264208James Dong offset_dy = (dy + 7) >> 3; 182029a84457aed4c45bc900998b5e11c03023264208James Dong x_pos = x_pos >> 3; /* round it to full-pel resolution */ 182129a84457aed4c45bc900998b5e11c03023264208James Dong y_pos = y_pos >> 3; 182229a84457aed4c45bc900998b5e11c03023264208James Dong 182329a84457aed4c45bc900998b5e11c03023264208James Dong ref += y_pos * picpitch + x_pos; 182429a84457aed4c45bc900998b5e11c03023264208James Dong 182529a84457aed4c45bc900998b5e11c03023264208James Dong index = offset_dx + (offset_dy << 1) + ((blkwidth << 1) & 0x7); 182629a84457aed4c45bc900998b5e11c03023264208James Dong 182729a84457aed4c45bc900998b5e11c03023264208James Dong (*(eChromaMC_SIMD[index]))(ref, picpitch , dx, dy, pred, picpitch, blkwidth, blkheight); 182829a84457aed4c45bc900998b5e11c03023264208James Dong return ; 182929a84457aed4c45bc900998b5e11c03023264208James Dong} 183029a84457aed4c45bc900998b5e11c03023264208James Dong 183129a84457aed4c45bc900998b5e11c03023264208James Dong 183229a84457aed4c45bc900998b5e11c03023264208James Dong/* SIMD routines, unroll the loops in vertical direction, decreasing loops (things to be done) */ 183329a84457aed4c45bc900998b5e11c03023264208James Dongvoid eChromaDiagonalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, 183429a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *pOut, int predPitch, int blkwidth, int blkheight) 183529a84457aed4c45bc900998b5e11c03023264208James Dong{ 183629a84457aed4c45bc900998b5e11c03023264208James Dong int32 r0, r1, r2, r3, result0, result1; 183729a84457aed4c45bc900998b5e11c03023264208James Dong uint8 temp[288]; 183829a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *ref, *out; 183929a84457aed4c45bc900998b5e11c03023264208James Dong int i, j; 184029a84457aed4c45bc900998b5e11c03023264208James Dong int dx_8 = 8 - dx; 184129a84457aed4c45bc900998b5e11c03023264208James Dong int dy_8 = 8 - dy; 184229a84457aed4c45bc900998b5e11c03023264208James Dong 184329a84457aed4c45bc900998b5e11c03023264208James Dong /* horizontal first */ 184429a84457aed4c45bc900998b5e11c03023264208James Dong out = temp; 184529a84457aed4c45bc900998b5e11c03023264208James Dong for (i = 0; i < blkheight + 1; i++) 184629a84457aed4c45bc900998b5e11c03023264208James Dong { 184729a84457aed4c45bc900998b5e11c03023264208James Dong ref = pRef; 184829a84457aed4c45bc900998b5e11c03023264208James Dong r0 = ref[0]; 184929a84457aed4c45bc900998b5e11c03023264208James Dong for (j = 0; j < blkwidth; j += 4) 185029a84457aed4c45bc900998b5e11c03023264208James Dong { 185129a84457aed4c45bc900998b5e11c03023264208James Dong r0 |= (ref[2] << 16); 185229a84457aed4c45bc900998b5e11c03023264208James Dong result0 = dx_8 * r0; 185329a84457aed4c45bc900998b5e11c03023264208James Dong 185429a84457aed4c45bc900998b5e11c03023264208James Dong r1 = ref[1] | (ref[3] << 16); 185529a84457aed4c45bc900998b5e11c03023264208James Dong result0 += dx * r1; 185629a84457aed4c45bc900998b5e11c03023264208James Dong *(int32 *)out = result0; 185729a84457aed4c45bc900998b5e11c03023264208James Dong 185829a84457aed4c45bc900998b5e11c03023264208James Dong result0 = dx_8 * r1; 185929a84457aed4c45bc900998b5e11c03023264208James Dong 186029a84457aed4c45bc900998b5e11c03023264208James Dong r2 = ref[4]; 186129a84457aed4c45bc900998b5e11c03023264208James Dong r0 = r0 >> 16; 186229a84457aed4c45bc900998b5e11c03023264208James Dong r1 = r0 | (r2 << 16); 186329a84457aed4c45bc900998b5e11c03023264208James Dong result0 += dx * r1; 186429a84457aed4c45bc900998b5e11c03023264208James Dong *(int32 *)(out + 16) = result0; 186529a84457aed4c45bc900998b5e11c03023264208James Dong 186629a84457aed4c45bc900998b5e11c03023264208James Dong ref += 4; 186729a84457aed4c45bc900998b5e11c03023264208James Dong out += 4; 186829a84457aed4c45bc900998b5e11c03023264208James Dong r0 = r2; 186929a84457aed4c45bc900998b5e11c03023264208James Dong } 187029a84457aed4c45bc900998b5e11c03023264208James Dong pRef += srcPitch; 187129a84457aed4c45bc900998b5e11c03023264208James Dong out += (32 - blkwidth); 187229a84457aed4c45bc900998b5e11c03023264208James Dong } 187329a84457aed4c45bc900998b5e11c03023264208James Dong 187429a84457aed4c45bc900998b5e11c03023264208James Dong// pRef -= srcPitch*(blkheight+1); 187529a84457aed4c45bc900998b5e11c03023264208James Dong ref = temp; 187629a84457aed4c45bc900998b5e11c03023264208James Dong 187729a84457aed4c45bc900998b5e11c03023264208James Dong for (j = 0; j < blkwidth; j += 4) 187829a84457aed4c45bc900998b5e11c03023264208James Dong { 187929a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *(int32 *)ref; 188029a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *(int32 *)(ref + 16); 188129a84457aed4c45bc900998b5e11c03023264208James Dong ref += 32; 188229a84457aed4c45bc900998b5e11c03023264208James Dong out = pOut; 188329a84457aed4c45bc900998b5e11c03023264208James Dong for (i = 0; i < (blkheight >> 1); i++) 188429a84457aed4c45bc900998b5e11c03023264208James Dong { 188529a84457aed4c45bc900998b5e11c03023264208James Dong result0 = dy_8 * r0 + 0x00200020; 188629a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *(int32 *)ref; 188729a84457aed4c45bc900998b5e11c03023264208James Dong result0 += dy * r2; 188829a84457aed4c45bc900998b5e11c03023264208James Dong result0 >>= 6; 188929a84457aed4c45bc900998b5e11c03023264208James Dong result0 &= 0x00FF00FF; 189029a84457aed4c45bc900998b5e11c03023264208James Dong r0 = r2; 189129a84457aed4c45bc900998b5e11c03023264208James Dong 189229a84457aed4c45bc900998b5e11c03023264208James Dong result1 = dy_8 * r1 + 0x00200020; 189329a84457aed4c45bc900998b5e11c03023264208James Dong r3 = *(int32 *)(ref + 16); 189429a84457aed4c45bc900998b5e11c03023264208James Dong result1 += dy * r3; 189529a84457aed4c45bc900998b5e11c03023264208James Dong result1 >>= 6; 189629a84457aed4c45bc900998b5e11c03023264208James Dong result1 &= 0x00FF00FF; 189729a84457aed4c45bc900998b5e11c03023264208James Dong r1 = r3; 189829a84457aed4c45bc900998b5e11c03023264208James Dong *(int32 *)out = result0 | (result1 << 8); 189929a84457aed4c45bc900998b5e11c03023264208James Dong out += predPitch; 190029a84457aed4c45bc900998b5e11c03023264208James Dong ref += 32; 190129a84457aed4c45bc900998b5e11c03023264208James Dong 190229a84457aed4c45bc900998b5e11c03023264208James Dong result0 = dy_8 * r0 + 0x00200020; 190329a84457aed4c45bc900998b5e11c03023264208James Dong r2 = *(int32 *)ref; 190429a84457aed4c45bc900998b5e11c03023264208James Dong result0 += dy * r2; 190529a84457aed4c45bc900998b5e11c03023264208James Dong result0 >>= 6; 190629a84457aed4c45bc900998b5e11c03023264208James Dong result0 &= 0x00FF00FF; 190729a84457aed4c45bc900998b5e11c03023264208James Dong r0 = r2; 190829a84457aed4c45bc900998b5e11c03023264208James Dong 190929a84457aed4c45bc900998b5e11c03023264208James Dong result1 = dy_8 * r1 + 0x00200020; 191029a84457aed4c45bc900998b5e11c03023264208James Dong r3 = *(int32 *)(ref + 16); 191129a84457aed4c45bc900998b5e11c03023264208James Dong result1 += dy * r3; 191229a84457aed4c45bc900998b5e11c03023264208James Dong result1 >>= 6; 191329a84457aed4c45bc900998b5e11c03023264208James Dong result1 &= 0x00FF00FF; 191429a84457aed4c45bc900998b5e11c03023264208James Dong r1 = r3; 191529a84457aed4c45bc900998b5e11c03023264208James Dong *(int32 *)out = result0 | (result1 << 8); 191629a84457aed4c45bc900998b5e11c03023264208James Dong out += predPitch; 191729a84457aed4c45bc900998b5e11c03023264208James Dong ref += 32; 191829a84457aed4c45bc900998b5e11c03023264208James Dong } 191929a84457aed4c45bc900998b5e11c03023264208James Dong pOut += 4; 192029a84457aed4c45bc900998b5e11c03023264208James Dong ref = temp + 4; /* since it can only iterate twice max */ 192129a84457aed4c45bc900998b5e11c03023264208James Dong } 192229a84457aed4c45bc900998b5e11c03023264208James Dong return; 192329a84457aed4c45bc900998b5e11c03023264208James Dong} 192429a84457aed4c45bc900998b5e11c03023264208James Dong 192529a84457aed4c45bc900998b5e11c03023264208James Dongvoid eChromaHorizontalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, 192629a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *pOut, int predPitch, int blkwidth, int blkheight) 192729a84457aed4c45bc900998b5e11c03023264208James Dong{ 192829a84457aed4c45bc900998b5e11c03023264208James Dong (void)(dy); 192929a84457aed4c45bc900998b5e11c03023264208James Dong 193029a84457aed4c45bc900998b5e11c03023264208James Dong int32 r0, r1, r2, result0, result1; 193129a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *ref, *out; 193229a84457aed4c45bc900998b5e11c03023264208James Dong int i, j; 193329a84457aed4c45bc900998b5e11c03023264208James Dong int dx_8 = 8 - dx; 193429a84457aed4c45bc900998b5e11c03023264208James Dong 193529a84457aed4c45bc900998b5e11c03023264208James Dong /* horizontal first */ 193629a84457aed4c45bc900998b5e11c03023264208James Dong for (i = 0; i < blkheight; i++) 193729a84457aed4c45bc900998b5e11c03023264208James Dong { 193829a84457aed4c45bc900998b5e11c03023264208James Dong ref = pRef; 193929a84457aed4c45bc900998b5e11c03023264208James Dong out = pOut; 194029a84457aed4c45bc900998b5e11c03023264208James Dong 194129a84457aed4c45bc900998b5e11c03023264208James Dong r0 = ref[0]; 194229a84457aed4c45bc900998b5e11c03023264208James Dong for (j = 0; j < blkwidth; j += 4) 194329a84457aed4c45bc900998b5e11c03023264208James Dong { 194429a84457aed4c45bc900998b5e11c03023264208James Dong r0 |= (ref[2] << 16); 194529a84457aed4c45bc900998b5e11c03023264208James Dong result0 = dx_8 * r0 + 0x00040004; 194629a84457aed4c45bc900998b5e11c03023264208James Dong 194729a84457aed4c45bc900998b5e11c03023264208James Dong r1 = ref[1] | (ref[3] << 16); 194829a84457aed4c45bc900998b5e11c03023264208James Dong result0 += dx * r1; 194929a84457aed4c45bc900998b5e11c03023264208James Dong result0 >>= 3; 195029a84457aed4c45bc900998b5e11c03023264208James Dong result0 &= 0x00FF00FF; 195129a84457aed4c45bc900998b5e11c03023264208James Dong 195229a84457aed4c45bc900998b5e11c03023264208James Dong result1 = dx_8 * r1 + 0x00040004; 195329a84457aed4c45bc900998b5e11c03023264208James Dong 195429a84457aed4c45bc900998b5e11c03023264208James Dong r2 = ref[4]; 195529a84457aed4c45bc900998b5e11c03023264208James Dong r0 = r0 >> 16; 195629a84457aed4c45bc900998b5e11c03023264208James Dong r1 = r0 | (r2 << 16); 195729a84457aed4c45bc900998b5e11c03023264208James Dong result1 += dx * r1; 195829a84457aed4c45bc900998b5e11c03023264208James Dong result1 >>= 3; 195929a84457aed4c45bc900998b5e11c03023264208James Dong result1 &= 0x00FF00FF; 196029a84457aed4c45bc900998b5e11c03023264208James Dong 196129a84457aed4c45bc900998b5e11c03023264208James Dong *(int32 *)out = result0 | (result1 << 8); 196229a84457aed4c45bc900998b5e11c03023264208James Dong 196329a84457aed4c45bc900998b5e11c03023264208James Dong ref += 4; 196429a84457aed4c45bc900998b5e11c03023264208James Dong out += 4; 196529a84457aed4c45bc900998b5e11c03023264208James Dong r0 = r2; 196629a84457aed4c45bc900998b5e11c03023264208James Dong } 196729a84457aed4c45bc900998b5e11c03023264208James Dong 196829a84457aed4c45bc900998b5e11c03023264208James Dong pRef += srcPitch; 196929a84457aed4c45bc900998b5e11c03023264208James Dong pOut += predPitch; 197029a84457aed4c45bc900998b5e11c03023264208James Dong } 197129a84457aed4c45bc900998b5e11c03023264208James Dong return; 197229a84457aed4c45bc900998b5e11c03023264208James Dong} 197329a84457aed4c45bc900998b5e11c03023264208James Dong 197429a84457aed4c45bc900998b5e11c03023264208James Dongvoid eChromaVerticalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, 197529a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *pOut, int predPitch, int blkwidth, int blkheight) 197629a84457aed4c45bc900998b5e11c03023264208James Dong{ 197729a84457aed4c45bc900998b5e11c03023264208James Dong (void)(dx); 197829a84457aed4c45bc900998b5e11c03023264208James Dong 197929a84457aed4c45bc900998b5e11c03023264208James Dong int32 r0, r1, r2, r3, result0, result1; 198029a84457aed4c45bc900998b5e11c03023264208James Dong int i, j; 198129a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *ref, *out; 198229a84457aed4c45bc900998b5e11c03023264208James Dong int dy_8 = 8 - dy; 198329a84457aed4c45bc900998b5e11c03023264208James Dong /* vertical first */ 198429a84457aed4c45bc900998b5e11c03023264208James Dong for (i = 0; i < blkwidth; i += 4) 198529a84457aed4c45bc900998b5e11c03023264208James Dong { 198629a84457aed4c45bc900998b5e11c03023264208James Dong ref = pRef; 198729a84457aed4c45bc900998b5e11c03023264208James Dong out = pOut; 198829a84457aed4c45bc900998b5e11c03023264208James Dong 198929a84457aed4c45bc900998b5e11c03023264208James Dong r0 = ref[0] | (ref[2] << 16); 199029a84457aed4c45bc900998b5e11c03023264208James Dong r1 = ref[1] | (ref[3] << 16); 199129a84457aed4c45bc900998b5e11c03023264208James Dong ref += srcPitch; 199229a84457aed4c45bc900998b5e11c03023264208James Dong for (j = 0; j < blkheight; j++) 199329a84457aed4c45bc900998b5e11c03023264208James Dong { 199429a84457aed4c45bc900998b5e11c03023264208James Dong result0 = dy_8 * r0 + 0x00040004; 199529a84457aed4c45bc900998b5e11c03023264208James Dong r2 = ref[0] | (ref[2] << 16); 199629a84457aed4c45bc900998b5e11c03023264208James Dong result0 += dy * r2; 199729a84457aed4c45bc900998b5e11c03023264208James Dong result0 >>= 3; 199829a84457aed4c45bc900998b5e11c03023264208James Dong result0 &= 0x00FF00FF; 199929a84457aed4c45bc900998b5e11c03023264208James Dong r0 = r2; 200029a84457aed4c45bc900998b5e11c03023264208James Dong 200129a84457aed4c45bc900998b5e11c03023264208James Dong result1 = dy_8 * r1 + 0x00040004; 200229a84457aed4c45bc900998b5e11c03023264208James Dong r3 = ref[1] | (ref[3] << 16); 200329a84457aed4c45bc900998b5e11c03023264208James Dong result1 += dy * r3; 200429a84457aed4c45bc900998b5e11c03023264208James Dong result1 >>= 3; 200529a84457aed4c45bc900998b5e11c03023264208James Dong result1 &= 0x00FF00FF; 200629a84457aed4c45bc900998b5e11c03023264208James Dong r1 = r3; 200729a84457aed4c45bc900998b5e11c03023264208James Dong *(int32 *)out = result0 | (result1 << 8); 200829a84457aed4c45bc900998b5e11c03023264208James Dong ref += srcPitch; 200929a84457aed4c45bc900998b5e11c03023264208James Dong out += predPitch; 201029a84457aed4c45bc900998b5e11c03023264208James Dong } 201129a84457aed4c45bc900998b5e11c03023264208James Dong pOut += 4; 201229a84457aed4c45bc900998b5e11c03023264208James Dong pRef += 4; 201329a84457aed4c45bc900998b5e11c03023264208James Dong } 201429a84457aed4c45bc900998b5e11c03023264208James Dong return; 201529a84457aed4c45bc900998b5e11c03023264208James Dong} 201629a84457aed4c45bc900998b5e11c03023264208James Dong 201729a84457aed4c45bc900998b5e11c03023264208James Dongvoid eChromaDiagonalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, 201829a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *pOut, int predPitch, int blkwidth, int blkheight) 201929a84457aed4c45bc900998b5e11c03023264208James Dong{ 202029a84457aed4c45bc900998b5e11c03023264208James Dong (void)(blkwidth); 202129a84457aed4c45bc900998b5e11c03023264208James Dong 202229a84457aed4c45bc900998b5e11c03023264208James Dong int32 r0, r1, temp0, temp1, result; 202329a84457aed4c45bc900998b5e11c03023264208James Dong int32 temp[9]; 202429a84457aed4c45bc900998b5e11c03023264208James Dong int32 *out; 202529a84457aed4c45bc900998b5e11c03023264208James Dong int i, r_temp; 202629a84457aed4c45bc900998b5e11c03023264208James Dong int dy_8 = 8 - dy; 202729a84457aed4c45bc900998b5e11c03023264208James Dong 202829a84457aed4c45bc900998b5e11c03023264208James Dong /* horizontal first */ 202929a84457aed4c45bc900998b5e11c03023264208James Dong out = temp; 203029a84457aed4c45bc900998b5e11c03023264208James Dong for (i = 0; i < blkheight + 1; i++) 203129a84457aed4c45bc900998b5e11c03023264208James Dong { 203229a84457aed4c45bc900998b5e11c03023264208James Dong r_temp = pRef[1]; 203329a84457aed4c45bc900998b5e11c03023264208James Dong temp0 = (pRef[0] << 3) + dx * (r_temp - pRef[0]); 203429a84457aed4c45bc900998b5e11c03023264208James Dong temp1 = (r_temp << 3) + dx * (pRef[2] - r_temp); 203529a84457aed4c45bc900998b5e11c03023264208James Dong r0 = temp0 | (temp1 << 16); 203629a84457aed4c45bc900998b5e11c03023264208James Dong *out++ = r0; 203729a84457aed4c45bc900998b5e11c03023264208James Dong pRef += srcPitch; 203829a84457aed4c45bc900998b5e11c03023264208James Dong } 203929a84457aed4c45bc900998b5e11c03023264208James Dong 204029a84457aed4c45bc900998b5e11c03023264208James Dong pRef -= srcPitch * (blkheight + 1); 204129a84457aed4c45bc900998b5e11c03023264208James Dong 204229a84457aed4c45bc900998b5e11c03023264208James Dong out = temp; 204329a84457aed4c45bc900998b5e11c03023264208James Dong 204429a84457aed4c45bc900998b5e11c03023264208James Dong r0 = *out++; 204529a84457aed4c45bc900998b5e11c03023264208James Dong 204629a84457aed4c45bc900998b5e11c03023264208James Dong for (i = 0; i < blkheight; i++) 204729a84457aed4c45bc900998b5e11c03023264208James Dong { 204829a84457aed4c45bc900998b5e11c03023264208James Dong result = dy_8 * r0 + 0x00200020; 204929a84457aed4c45bc900998b5e11c03023264208James Dong r1 = *out++; 205029a84457aed4c45bc900998b5e11c03023264208James Dong result += dy * r1; 205129a84457aed4c45bc900998b5e11c03023264208James Dong result >>= 6; 205229a84457aed4c45bc900998b5e11c03023264208James Dong result &= 0x00FF00FF; 205329a84457aed4c45bc900998b5e11c03023264208James Dong *(int16 *)pOut = (result >> 8) | (result & 0xFF); 205429a84457aed4c45bc900998b5e11c03023264208James Dong r0 = r1; 205529a84457aed4c45bc900998b5e11c03023264208James Dong pOut += predPitch; 205629a84457aed4c45bc900998b5e11c03023264208James Dong } 205729a84457aed4c45bc900998b5e11c03023264208James Dong return; 205829a84457aed4c45bc900998b5e11c03023264208James Dong} 205929a84457aed4c45bc900998b5e11c03023264208James Dong 206029a84457aed4c45bc900998b5e11c03023264208James Dongvoid eChromaHorizontalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, 206129a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *pOut, int predPitch, int blkwidth, int blkheight) 206229a84457aed4c45bc900998b5e11c03023264208James Dong{ 206329a84457aed4c45bc900998b5e11c03023264208James Dong (void)(dy); 206429a84457aed4c45bc900998b5e11c03023264208James Dong (void)(blkwidth); 206529a84457aed4c45bc900998b5e11c03023264208James Dong 206629a84457aed4c45bc900998b5e11c03023264208James Dong int i, temp, temp0, temp1; 206729a84457aed4c45bc900998b5e11c03023264208James Dong 206829a84457aed4c45bc900998b5e11c03023264208James Dong /* horizontal first */ 206929a84457aed4c45bc900998b5e11c03023264208James Dong for (i = 0; i < blkheight; i++) 207029a84457aed4c45bc900998b5e11c03023264208James Dong { 207129a84457aed4c45bc900998b5e11c03023264208James Dong temp = pRef[1]; 207229a84457aed4c45bc900998b5e11c03023264208James Dong temp0 = ((pRef[0] << 3) + dx * (temp - pRef[0]) + 4) >> 3; 207329a84457aed4c45bc900998b5e11c03023264208James Dong temp1 = ((temp << 3) + dx * (pRef[2] - temp) + 4) >> 3; 207429a84457aed4c45bc900998b5e11c03023264208James Dong 207529a84457aed4c45bc900998b5e11c03023264208James Dong *(int16 *)pOut = temp0 | (temp1 << 8); 207629a84457aed4c45bc900998b5e11c03023264208James Dong pRef += srcPitch; 207729a84457aed4c45bc900998b5e11c03023264208James Dong pOut += predPitch; 207829a84457aed4c45bc900998b5e11c03023264208James Dong 207929a84457aed4c45bc900998b5e11c03023264208James Dong } 208029a84457aed4c45bc900998b5e11c03023264208James Dong return; 208129a84457aed4c45bc900998b5e11c03023264208James Dong} 208229a84457aed4c45bc900998b5e11c03023264208James Dongvoid eChromaVerticalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, 208329a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *pOut, int predPitch, int blkwidth, int blkheight) 208429a84457aed4c45bc900998b5e11c03023264208James Dong{ 208529a84457aed4c45bc900998b5e11c03023264208James Dong (void)(dx); 208629a84457aed4c45bc900998b5e11c03023264208James Dong (void)(blkwidth); 208729a84457aed4c45bc900998b5e11c03023264208James Dong 208829a84457aed4c45bc900998b5e11c03023264208James Dong int32 r0, r1, result; 208929a84457aed4c45bc900998b5e11c03023264208James Dong int i; 209029a84457aed4c45bc900998b5e11c03023264208James Dong int dy_8 = 8 - dy; 209129a84457aed4c45bc900998b5e11c03023264208James Dong r0 = pRef[0] | (pRef[1] << 16); 209229a84457aed4c45bc900998b5e11c03023264208James Dong pRef += srcPitch; 209329a84457aed4c45bc900998b5e11c03023264208James Dong for (i = 0; i < blkheight; i++) 209429a84457aed4c45bc900998b5e11c03023264208James Dong { 209529a84457aed4c45bc900998b5e11c03023264208James Dong result = dy_8 * r0 + 0x00040004; 209629a84457aed4c45bc900998b5e11c03023264208James Dong r1 = pRef[0] | (pRef[1] << 16); 209729a84457aed4c45bc900998b5e11c03023264208James Dong result += dy * r1; 209829a84457aed4c45bc900998b5e11c03023264208James Dong result >>= 3; 209929a84457aed4c45bc900998b5e11c03023264208James Dong result &= 0x00FF00FF; 210029a84457aed4c45bc900998b5e11c03023264208James Dong *(int16 *)pOut = (result >> 8) | (result & 0xFF); 210129a84457aed4c45bc900998b5e11c03023264208James Dong r0 = r1; 210229a84457aed4c45bc900998b5e11c03023264208James Dong pRef += srcPitch; 210329a84457aed4c45bc900998b5e11c03023264208James Dong pOut += predPitch; 210429a84457aed4c45bc900998b5e11c03023264208James Dong } 210529a84457aed4c45bc900998b5e11c03023264208James Dong return; 210629a84457aed4c45bc900998b5e11c03023264208James Dong} 210729a84457aed4c45bc900998b5e11c03023264208James Dong 210829a84457aed4c45bc900998b5e11c03023264208James Dongvoid eChromaFullMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy, 210929a84457aed4c45bc900998b5e11c03023264208James Dong uint8 *pOut, int predPitch, int blkwidth, int blkheight) 211029a84457aed4c45bc900998b5e11c03023264208James Dong{ 211129a84457aed4c45bc900998b5e11c03023264208James Dong (void)(dx); 211229a84457aed4c45bc900998b5e11c03023264208James Dong (void)(dy); 211329a84457aed4c45bc900998b5e11c03023264208James Dong 211429a84457aed4c45bc900998b5e11c03023264208James Dong int i, j; 211529a84457aed4c45bc900998b5e11c03023264208James Dong int offset_in = srcPitch - blkwidth; 211629a84457aed4c45bc900998b5e11c03023264208James Dong int offset_out = predPitch - blkwidth; 211729a84457aed4c45bc900998b5e11c03023264208James Dong uint16 temp; 211829a84457aed4c45bc900998b5e11c03023264208James Dong uint8 byte; 211929a84457aed4c45bc900998b5e11c03023264208James Dong 21204b43b41eaf8c4c80f66185e13620cf94b8b2ef5bMartin Storsjo if (((intptr_t)pRef)&1) 212129a84457aed4c45bc900998b5e11c03023264208James Dong { 212229a84457aed4c45bc900998b5e11c03023264208James Dong for (j = blkheight; j > 0; j--) 212329a84457aed4c45bc900998b5e11c03023264208James Dong { 212429a84457aed4c45bc900998b5e11c03023264208James Dong for (i = blkwidth; i > 0; i -= 2) 212529a84457aed4c45bc900998b5e11c03023264208James Dong { 212629a84457aed4c45bc900998b5e11c03023264208James Dong temp = *pRef++; 212729a84457aed4c45bc900998b5e11c03023264208James Dong byte = *pRef++; 212829a84457aed4c45bc900998b5e11c03023264208James Dong temp |= (byte << 8); 212929a84457aed4c45bc900998b5e11c03023264208James Dong *((uint16*)pOut) = temp; /* write 2 bytes */ 213029a84457aed4c45bc900998b5e11c03023264208James Dong pOut += 2; 213129a84457aed4c45bc900998b5e11c03023264208James Dong } 213229a84457aed4c45bc900998b5e11c03023264208James Dong pOut += offset_out; 213329a84457aed4c45bc900998b5e11c03023264208James Dong pRef += offset_in; 213429a84457aed4c45bc900998b5e11c03023264208James Dong } 213529a84457aed4c45bc900998b5e11c03023264208James Dong } 213629a84457aed4c45bc900998b5e11c03023264208James Dong else 213729a84457aed4c45bc900998b5e11c03023264208James Dong { 213829a84457aed4c45bc900998b5e11c03023264208James Dong for (j = blkheight; j > 0; j--) 213929a84457aed4c45bc900998b5e11c03023264208James Dong { 214029a84457aed4c45bc900998b5e11c03023264208James Dong for (i = blkwidth; i > 0; i -= 2) 214129a84457aed4c45bc900998b5e11c03023264208James Dong { 214229a84457aed4c45bc900998b5e11c03023264208James Dong temp = *((uint16*)pRef); 214329a84457aed4c45bc900998b5e11c03023264208James Dong *((uint16*)pOut) = temp; 214429a84457aed4c45bc900998b5e11c03023264208James Dong pRef += 2; 214529a84457aed4c45bc900998b5e11c03023264208James Dong pOut += 2; 214629a84457aed4c45bc900998b5e11c03023264208James Dong } 214729a84457aed4c45bc900998b5e11c03023264208James Dong pOut += offset_out; 214829a84457aed4c45bc900998b5e11c03023264208James Dong pRef += offset_in; 214929a84457aed4c45bc900998b5e11c03023264208James Dong } 215029a84457aed4c45bc900998b5e11c03023264208James Dong } 215129a84457aed4c45bc900998b5e11c03023264208James Dong return ; 215229a84457aed4c45bc900998b5e11c03023264208James Dong} 2153