11cc31e629e8132df390ae692873c847d1c2f62c0James Dong/* ------------------------------------------------------------------
21cc31e629e8132df390ae692873c847d1c2f62c0James Dong * Copyright (C) 1998-2009 PacketVideo
31cc31e629e8132df390ae692873c847d1c2f62c0James Dong *
41cc31e629e8132df390ae692873c847d1c2f62c0James Dong * Licensed under the Apache License, Version 2.0 (the "License");
51cc31e629e8132df390ae692873c847d1c2f62c0James Dong * you may not use this file except in compliance with the License.
61cc31e629e8132df390ae692873c847d1c2f62c0James Dong * You may obtain a copy of the License at
71cc31e629e8132df390ae692873c847d1c2f62c0James Dong *
81cc31e629e8132df390ae692873c847d1c2f62c0James Dong *      http://www.apache.org/licenses/LICENSE-2.0
91cc31e629e8132df390ae692873c847d1c2f62c0James Dong *
101cc31e629e8132df390ae692873c847d1c2f62c0James Dong * Unless required by applicable law or agreed to in writing, software
111cc31e629e8132df390ae692873c847d1c2f62c0James Dong * distributed under the License is distributed on an "AS IS" BASIS,
121cc31e629e8132df390ae692873c847d1c2f62c0James Dong * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
131cc31e629e8132df390ae692873c847d1c2f62c0James Dong * express or implied.
141cc31e629e8132df390ae692873c847d1c2f62c0James Dong * See the License for the specific language governing permissions
151cc31e629e8132df390ae692873c847d1c2f62c0James Dong * and limitations under the License.
161cc31e629e8132df390ae692873c847d1c2f62c0James Dong * -------------------------------------------------------------------
171cc31e629e8132df390ae692873c847d1c2f62c0James Dong */
181cc31e629e8132df390ae692873c847d1c2f62c0James Dong#include "avcenc_lib.h"
191cc31e629e8132df390ae692873c847d1c2f62c0James Dong#include "avcenc_int.h"
201cc31e629e8132df390ae692873c847d1c2f62c0James Dong
211cc31e629e8132df390ae692873c847d1c2f62c0James Dong
221cc31e629e8132df390ae692873c847d1c2f62c0James Dong#define CLIP_RESULT(x)      if((uint)x > 0xFF){ \
231cc31e629e8132df390ae692873c847d1c2f62c0James Dong                 x = 0xFF & (~(x>>31));}
241cc31e629e8132df390ae692873c847d1c2f62c0James Dong
251cc31e629e8132df390ae692873c847d1c2f62c0James Dong/* (blkwidth << 2) + (dy << 1) + dx */
261cc31e629e8132df390ae692873c847d1c2f62c0James Dongstatic void (*const eChromaMC_SIMD[8])(uint8 *, int , int , int , uint8 *, int, int , int) =
271cc31e629e8132df390ae692873c847d1c2f62c0James Dong{
281cc31e629e8132df390ae692873c847d1c2f62c0James Dong    &eChromaFullMC_SIMD,
291cc31e629e8132df390ae692873c847d1c2f62c0James Dong    &eChromaHorizontalMC_SIMD,
301cc31e629e8132df390ae692873c847d1c2f62c0James Dong    &eChromaVerticalMC_SIMD,
311cc31e629e8132df390ae692873c847d1c2f62c0James Dong    &eChromaDiagonalMC_SIMD,
321cc31e629e8132df390ae692873c847d1c2f62c0James Dong    &eChromaFullMC_SIMD,
331cc31e629e8132df390ae692873c847d1c2f62c0James Dong    &eChromaHorizontalMC2_SIMD,
341cc31e629e8132df390ae692873c847d1c2f62c0James Dong    &eChromaVerticalMC2_SIMD,
351cc31e629e8132df390ae692873c847d1c2f62c0James Dong    &eChromaDiagonalMC2_SIMD
361cc31e629e8132df390ae692873c847d1c2f62c0James Dong};
371cc31e629e8132df390ae692873c847d1c2f62c0James Dong/* Perform motion prediction and compensation with residue if exist. */
381cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid AVCMBMotionComp(AVCEncObject *encvid, AVCCommonObj *video)
391cc31e629e8132df390ae692873c847d1c2f62c0James Dong{
401cc31e629e8132df390ae692873c847d1c2f62c0James Dong    (void)(encvid);
411cc31e629e8132df390ae692873c847d1c2f62c0James Dong
421cc31e629e8132df390ae692873c847d1c2f62c0James Dong    AVCMacroblock *currMB = video->currMB;
431cc31e629e8132df390ae692873c847d1c2f62c0James Dong    AVCPictureData *currPic = video->currPic;
441cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int mbPartIdx, subMbPartIdx;
451cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int ref_idx;
461cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int offset_MbPart_indx = 0;
471cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int16 *mv;
481cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint32 x_pos, y_pos;
491cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint8 *curL, *curCb, *curCr;
501cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint8 *ref_l, *ref_Cb, *ref_Cr;
511cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint8 *predBlock, *predCb, *predCr;
521cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int block_x, block_y, offset_x, offset_y, offsetP, offset;
531cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int x_position = (video->mb_x << 4);
541cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int y_position = (video->mb_y << 4);
551cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int MbHeight, MbWidth, mbPartIdx_X, mbPartIdx_Y, offset_indx;
561cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int picWidth = currPic->width;
571cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int picPitch = currPic->pitch;
581cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int picHeight = currPic->height;
591cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint32 tmp_word;
601cc31e629e8132df390ae692873c847d1c2f62c0James Dong
611cc31e629e8132df390ae692873c847d1c2f62c0James Dong    tmp_word = y_position * picPitch;
621cc31e629e8132df390ae692873c847d1c2f62c0James Dong    curL = currPic->Sl + tmp_word + x_position;
631cc31e629e8132df390ae692873c847d1c2f62c0James Dong    offset = (tmp_word >> 2) + (x_position >> 1);
641cc31e629e8132df390ae692873c847d1c2f62c0James Dong    curCb = currPic->Scb + offset;
651cc31e629e8132df390ae692873c847d1c2f62c0James Dong    curCr = currPic->Scr + offset;
661cc31e629e8132df390ae692873c847d1c2f62c0James Dong
671cc31e629e8132df390ae692873c847d1c2f62c0James Dong    predBlock = curL;
681cc31e629e8132df390ae692873c847d1c2f62c0James Dong    predCb = curCb;
691cc31e629e8132df390ae692873c847d1c2f62c0James Dong    predCr = curCr;
701cc31e629e8132df390ae692873c847d1c2f62c0James Dong
711cc31e629e8132df390ae692873c847d1c2f62c0James Dong    GetMotionVectorPredictor(video, 1);
721cc31e629e8132df390ae692873c847d1c2f62c0James Dong
731cc31e629e8132df390ae692873c847d1c2f62c0James Dong    for (mbPartIdx = 0; mbPartIdx < currMB->NumMbPart; mbPartIdx++)
741cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
751cc31e629e8132df390ae692873c847d1c2f62c0James Dong        MbHeight = currMB->SubMbPartHeight[mbPartIdx];
761cc31e629e8132df390ae692873c847d1c2f62c0James Dong        MbWidth = currMB->SubMbPartWidth[mbPartIdx];
771cc31e629e8132df390ae692873c847d1c2f62c0James Dong        mbPartIdx_X = ((mbPartIdx + offset_MbPart_indx) & 1);
781cc31e629e8132df390ae692873c847d1c2f62c0James Dong        mbPartIdx_Y = (mbPartIdx + offset_MbPart_indx) >> 1;
791cc31e629e8132df390ae692873c847d1c2f62c0James Dong        ref_idx = currMB->ref_idx_L0[(mbPartIdx_Y << 1) + mbPartIdx_X];
801cc31e629e8132df390ae692873c847d1c2f62c0James Dong        offset_indx = 0;
811cc31e629e8132df390ae692873c847d1c2f62c0James Dong
821cc31e629e8132df390ae692873c847d1c2f62c0James Dong        ref_l = video->RefPicList0[ref_idx]->Sl;
831cc31e629e8132df390ae692873c847d1c2f62c0James Dong        ref_Cb = video->RefPicList0[ref_idx]->Scb;
841cc31e629e8132df390ae692873c847d1c2f62c0James Dong        ref_Cr = video->RefPicList0[ref_idx]->Scr;
851cc31e629e8132df390ae692873c847d1c2f62c0James Dong
861cc31e629e8132df390ae692873c847d1c2f62c0James Dong        for (subMbPartIdx = 0; subMbPartIdx < currMB->NumSubMbPart[mbPartIdx]; subMbPartIdx++)
871cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
881cc31e629e8132df390ae692873c847d1c2f62c0James Dong            block_x = (mbPartIdx_X << 1) + ((subMbPartIdx + offset_indx) & 1);
891cc31e629e8132df390ae692873c847d1c2f62c0James Dong            block_y = (mbPartIdx_Y << 1) + (((subMbPartIdx + offset_indx) >> 1) & 1);
901cc31e629e8132df390ae692873c847d1c2f62c0James Dong            mv = (int16*)(currMB->mvL0 + block_x + (block_y << 2));
911cc31e629e8132df390ae692873c847d1c2f62c0James Dong            offset_x = x_position + (block_x << 2);
921cc31e629e8132df390ae692873c847d1c2f62c0James Dong            offset_y = y_position + (block_y << 2);
931cc31e629e8132df390ae692873c847d1c2f62c0James Dong            x_pos = (offset_x << 2) + *mv++;   /*quarter pel */
941cc31e629e8132df390ae692873c847d1c2f62c0James Dong            y_pos = (offset_y << 2) + *mv;   /*quarter pel */
951cc31e629e8132df390ae692873c847d1c2f62c0James Dong
961cc31e629e8132df390ae692873c847d1c2f62c0James Dong            //offset = offset_y * currPic->width;
971cc31e629e8132df390ae692873c847d1c2f62c0James Dong            //offsetC = (offset >> 2) + (offset_x >> 1);
981cc31e629e8132df390ae692873c847d1c2f62c0James Dong            offsetP = (block_y << 2) * picPitch + (block_x << 2);
991cc31e629e8132df390ae692873c847d1c2f62c0James Dong            eLumaMotionComp(ref_l, picPitch, picHeight, x_pos, y_pos,
1001cc31e629e8132df390ae692873c847d1c2f62c0James Dong                            /*comp_Sl + offset + offset_x,*/
1011cc31e629e8132df390ae692873c847d1c2f62c0James Dong                            predBlock + offsetP, picPitch, MbWidth, MbHeight);
1021cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1031cc31e629e8132df390ae692873c847d1c2f62c0James Dong            offsetP = (block_y * picWidth) + (block_x << 1);
1041cc31e629e8132df390ae692873c847d1c2f62c0James Dong            eChromaMotionComp(ref_Cb, picWidth >> 1, picHeight >> 1, x_pos, y_pos,
1051cc31e629e8132df390ae692873c847d1c2f62c0James Dong                              /*comp_Scb +  offsetC,*/
1061cc31e629e8132df390ae692873c847d1c2f62c0James Dong                              predCb + offsetP, picPitch >> 1, MbWidth >> 1, MbHeight >> 1);
1071cc31e629e8132df390ae692873c847d1c2f62c0James Dong            eChromaMotionComp(ref_Cr, picWidth >> 1, picHeight >> 1, x_pos, y_pos,
1081cc31e629e8132df390ae692873c847d1c2f62c0James Dong                              /*comp_Scr +  offsetC,*/
1091cc31e629e8132df390ae692873c847d1c2f62c0James Dong                              predCr + offsetP, picPitch >> 1, MbWidth >> 1, MbHeight >> 1);
1101cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1111cc31e629e8132df390ae692873c847d1c2f62c0James Dong            offset_indx = currMB->SubMbPartWidth[mbPartIdx] >> 3;
1121cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
1131cc31e629e8132df390ae692873c847d1c2f62c0James Dong        offset_MbPart_indx = currMB->MbPartWidth >> 4;
1141cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
1151cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1161cc31e629e8132df390ae692873c847d1c2f62c0James Dong    return ;
1171cc31e629e8132df390ae692873c847d1c2f62c0James Dong}
1181cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1191cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1201cc31e629e8132df390ae692873c847d1c2f62c0James Dong/* preform the actual  motion comp here */
1211cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eLumaMotionComp(uint8 *ref, int picpitch, int picheight,
1221cc31e629e8132df390ae692873c847d1c2f62c0James Dong                     int x_pos, int y_pos,
1231cc31e629e8132df390ae692873c847d1c2f62c0James Dong                     uint8 *pred, int pred_pitch,
1241cc31e629e8132df390ae692873c847d1c2f62c0James Dong                     int blkwidth, int blkheight)
1251cc31e629e8132df390ae692873c847d1c2f62c0James Dong{
1261cc31e629e8132df390ae692873c847d1c2f62c0James Dong    (void)(picheight);
1271cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1281cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int dx, dy;
1291cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int temp2[21][21]; /* for intermediate results */
1301cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint8 *ref2;
1311cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1321cc31e629e8132df390ae692873c847d1c2f62c0James Dong    dx = x_pos & 3;
1331cc31e629e8132df390ae692873c847d1c2f62c0James Dong    dy = y_pos & 3;
1341cc31e629e8132df390ae692873c847d1c2f62c0James Dong    x_pos = x_pos >> 2;  /* round it to full-pel resolution */
1351cc31e629e8132df390ae692873c847d1c2f62c0James Dong    y_pos = y_pos >> 2;
1361cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1371cc31e629e8132df390ae692873c847d1c2f62c0James Dong    /* perform actual motion compensation */
1381cc31e629e8132df390ae692873c847d1c2f62c0James Dong    if (dx == 0 && dy == 0)
1391cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {  /* fullpel position *//* G */
1401cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1411cc31e629e8132df390ae692873c847d1c2f62c0James Dong        ref += y_pos * picpitch + x_pos;
1421cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1431cc31e629e8132df390ae692873c847d1c2f62c0James Dong        eFullPelMC(ref, picpitch, pred, pred_pitch, blkwidth, blkheight);
1441cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1451cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }   /* other positions */
1461cc31e629e8132df390ae692873c847d1c2f62c0James Dong    else  if (dy == 0)
1471cc31e629e8132df390ae692873c847d1c2f62c0James Dong    { /* no vertical interpolation *//* a,b,c*/
1481cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1491cc31e629e8132df390ae692873c847d1c2f62c0James Dong        ref += y_pos * picpitch + x_pos;
1501cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1511cc31e629e8132df390ae692873c847d1c2f62c0James Dong        eHorzInterp1MC(ref, picpitch, pred, pred_pitch, blkwidth, blkheight, dx);
1521cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
1531cc31e629e8132df390ae692873c847d1c2f62c0James Dong    else if (dx == 0)
1541cc31e629e8132df390ae692873c847d1c2f62c0James Dong    { /*no horizontal interpolation *//* d,h,n */
1551cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1561cc31e629e8132df390ae692873c847d1c2f62c0James Dong        ref += y_pos * picpitch + x_pos;
1571cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1581cc31e629e8132df390ae692873c847d1c2f62c0James Dong        eVertInterp1MC(ref, picpitch, pred, pred_pitch, blkwidth, blkheight, dy);
1591cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
1601cc31e629e8132df390ae692873c847d1c2f62c0James Dong    else if (dy == 2)
1611cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {  /* horizontal cross *//* i, j, k */
1621cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1631cc31e629e8132df390ae692873c847d1c2f62c0James Dong        ref += y_pos * picpitch + x_pos - 2; /* move to the left 2 pixels */
1641cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1651cc31e629e8132df390ae692873c847d1c2f62c0James Dong        eVertInterp2MC(ref, picpitch, &temp2[0][0], 21, blkwidth + 5, blkheight);
1661cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1671cc31e629e8132df390ae692873c847d1c2f62c0James Dong        eHorzInterp2MC(&temp2[0][2], 21, pred, pred_pitch, blkwidth, blkheight, dx);
1681cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
1691cc31e629e8132df390ae692873c847d1c2f62c0James Dong    else if (dx == 2)
1701cc31e629e8132df390ae692873c847d1c2f62c0James Dong    { /* vertical cross */ /* f,q */
1711cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1721cc31e629e8132df390ae692873c847d1c2f62c0James Dong        ref += (y_pos - 2) * picpitch + x_pos; /* move to up 2 lines */
1731cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1741cc31e629e8132df390ae692873c847d1c2f62c0James Dong        eHorzInterp3MC(ref, picpitch, &temp2[0][0], 21, blkwidth, blkheight + 5);
1751cc31e629e8132df390ae692873c847d1c2f62c0James Dong        eVertInterp3MC(&temp2[2][0], 21, pred, pred_pitch, blkwidth, blkheight, dy);
1761cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
1771cc31e629e8132df390ae692873c847d1c2f62c0James Dong    else
1781cc31e629e8132df390ae692873c847d1c2f62c0James Dong    { /* diagonal *//* e,g,p,r */
1791cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1801cc31e629e8132df390ae692873c847d1c2f62c0James Dong        ref2 = ref + (y_pos + (dy / 2)) * picpitch + x_pos;
1811cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1821cc31e629e8132df390ae692873c847d1c2f62c0James Dong        ref += (y_pos * picpitch) + x_pos + (dx / 2);
1831cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1841cc31e629e8132df390ae692873c847d1c2f62c0James Dong        eDiagonalInterpMC(ref2, ref, picpitch, pred, pred_pitch, blkwidth, blkheight);
1851cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
1861cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1871cc31e629e8132df390ae692873c847d1c2f62c0James Dong    return ;
1881cc31e629e8132df390ae692873c847d1c2f62c0James Dong}
1891cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1901cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eCreateAlign(uint8 *ref, int picpitch, int y_pos,
1911cc31e629e8132df390ae692873c847d1c2f62c0James Dong                  uint8 *out, int blkwidth, int blkheight)
1921cc31e629e8132df390ae692873c847d1c2f62c0James Dong{
1931cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int i, j;
1941cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int offset, out_offset;
1951cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint32 prev_pix, result, pix1, pix2, pix4;
1961cc31e629e8132df390ae692873c847d1c2f62c0James Dong
1971cc31e629e8132df390ae692873c847d1c2f62c0James Dong    ref += y_pos * picpitch;// + x_pos;
1981cc31e629e8132df390ae692873c847d1c2f62c0James Dong    out_offset = 24 - blkwidth;
1991cc31e629e8132df390ae692873c847d1c2f62c0James Dong
2001cc31e629e8132df390ae692873c847d1c2f62c0James Dong    //switch(x_pos&0x3){
2011cc31e629e8132df390ae692873c847d1c2f62c0James Dong    switch (((uint32)ref)&0x3)
2021cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
2031cc31e629e8132df390ae692873c847d1c2f62c0James Dong        case 1:
2041cc31e629e8132df390ae692873c847d1c2f62c0James Dong            offset =  picpitch - blkwidth - 3;
2051cc31e629e8132df390ae692873c847d1c2f62c0James Dong            for (j = 0; j < blkheight; j++)
2061cc31e629e8132df390ae692873c847d1c2f62c0James Dong            {
2071cc31e629e8132df390ae692873c847d1c2f62c0James Dong                pix1 = *ref++;
2081cc31e629e8132df390ae692873c847d1c2f62c0James Dong                pix2 = *((uint16*)ref);
2091cc31e629e8132df390ae692873c847d1c2f62c0James Dong                ref += 2;
2101cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (pix2 << 8) | pix1;
2111cc31e629e8132df390ae692873c847d1c2f62c0James Dong
2121cc31e629e8132df390ae692873c847d1c2f62c0James Dong                for (i = 3; i < blkwidth; i += 4)
2131cc31e629e8132df390ae692873c847d1c2f62c0James Dong                {
2141cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    pix4 = *((uint32*)ref);
2151cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    ref += 4;
2161cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    prev_pix = (pix4 << 24) & 0xFF000000; /* mask out byte belong to previous word */
2171cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result |= prev_pix;
2181cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    *((uint32*)out) = result;  /* write 4 bytes */
2191cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    out += 4;
2201cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = pix4 >> 8; /* for the next loop */
2211cc31e629e8132df390ae692873c847d1c2f62c0James Dong                }
2221cc31e629e8132df390ae692873c847d1c2f62c0James Dong                ref += offset;
2231cc31e629e8132df390ae692873c847d1c2f62c0James Dong                out += out_offset;
2241cc31e629e8132df390ae692873c847d1c2f62c0James Dong            }
2251cc31e629e8132df390ae692873c847d1c2f62c0James Dong            break;
2261cc31e629e8132df390ae692873c847d1c2f62c0James Dong        case 2:
2271cc31e629e8132df390ae692873c847d1c2f62c0James Dong            offset =  picpitch - blkwidth - 2;
2281cc31e629e8132df390ae692873c847d1c2f62c0James Dong            for (j = 0; j < blkheight; j++)
2291cc31e629e8132df390ae692873c847d1c2f62c0James Dong            {
2301cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = *((uint16*)ref);
2311cc31e629e8132df390ae692873c847d1c2f62c0James Dong                ref += 2;
2321cc31e629e8132df390ae692873c847d1c2f62c0James Dong                for (i = 2; i < blkwidth; i += 4)
2331cc31e629e8132df390ae692873c847d1c2f62c0James Dong                {
2341cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    pix4 = *((uint32*)ref);
2351cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    ref += 4;
2361cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    prev_pix = (pix4 << 16) & 0xFFFF0000; /* mask out byte belong to previous word */
2371cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result |= prev_pix;
2381cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    *((uint32*)out) = result;  /* write 4 bytes */
2391cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    out += 4;
2401cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = pix4 >> 16; /* for the next loop */
2411cc31e629e8132df390ae692873c847d1c2f62c0James Dong                }
2421cc31e629e8132df390ae692873c847d1c2f62c0James Dong                ref += offset;
2431cc31e629e8132df390ae692873c847d1c2f62c0James Dong                out += out_offset;
2441cc31e629e8132df390ae692873c847d1c2f62c0James Dong            }
2451cc31e629e8132df390ae692873c847d1c2f62c0James Dong            break;
2461cc31e629e8132df390ae692873c847d1c2f62c0James Dong        case 3:
2471cc31e629e8132df390ae692873c847d1c2f62c0James Dong            offset =  picpitch - blkwidth - 1;
2481cc31e629e8132df390ae692873c847d1c2f62c0James Dong            for (j = 0; j < blkheight; j++)
2491cc31e629e8132df390ae692873c847d1c2f62c0James Dong            {
2501cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = *ref++;
2511cc31e629e8132df390ae692873c847d1c2f62c0James Dong                for (i = 1; i < blkwidth; i += 4)
2521cc31e629e8132df390ae692873c847d1c2f62c0James Dong                {
2531cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    pix4 = *((uint32*)ref);
2541cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    ref += 4;
2551cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    prev_pix = (pix4 << 8) & 0xFFFFFF00; /* mask out byte belong to previous word */
2561cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result |= prev_pix;
2571cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    *((uint32*)out) = result;  /* write 4 bytes */
2581cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    out += 4;
2591cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = pix4 >> 24; /* for the next loop */
2601cc31e629e8132df390ae692873c847d1c2f62c0James Dong                }
2611cc31e629e8132df390ae692873c847d1c2f62c0James Dong                ref += offset;
2621cc31e629e8132df390ae692873c847d1c2f62c0James Dong                out += out_offset;
2631cc31e629e8132df390ae692873c847d1c2f62c0James Dong            }
2641cc31e629e8132df390ae692873c847d1c2f62c0James Dong            break;
2651cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
2661cc31e629e8132df390ae692873c847d1c2f62c0James Dong}
2671cc31e629e8132df390ae692873c847d1c2f62c0James Dong
2681cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eHorzInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch,
2691cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    int blkwidth, int blkheight, int dx)
2701cc31e629e8132df390ae692873c847d1c2f62c0James Dong{
2711cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint8 *p_ref;
2721cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint32 *p_cur;
2731cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint32 tmp, pkres;
2741cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int result, curr_offset, ref_offset;
2751cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int j;
2761cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int32 r0, r1, r2, r3, r4, r5;
2771cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int32 r13, r6;
2781cc31e629e8132df390ae692873c847d1c2f62c0James Dong
2791cc31e629e8132df390ae692873c847d1c2f62c0James Dong    p_cur = (uint32*)out; /* assume it's word aligned */
2801cc31e629e8132df390ae692873c847d1c2f62c0James Dong    curr_offset = (outpitch - blkwidth) >> 2;
2811cc31e629e8132df390ae692873c847d1c2f62c0James Dong    p_ref = in;
2821cc31e629e8132df390ae692873c847d1c2f62c0James Dong    ref_offset = inpitch - blkwidth;
2831cc31e629e8132df390ae692873c847d1c2f62c0James Dong
2841cc31e629e8132df390ae692873c847d1c2f62c0James Dong    if (dx&1)
2851cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
2861cc31e629e8132df390ae692873c847d1c2f62c0James Dong        dx = ((dx >> 1) ? -3 : -4); /* use in 3/4 pel */
2871cc31e629e8132df390ae692873c847d1c2f62c0James Dong        p_ref -= 2;
2881cc31e629e8132df390ae692873c847d1c2f62c0James Dong        r13 = 0;
2891cc31e629e8132df390ae692873c847d1c2f62c0James Dong        for (j = blkheight; j > 0; j--)
2901cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
2911cc31e629e8132df390ae692873c847d1c2f62c0James Dong            tmp = (uint32)(p_ref + blkwidth);
2921cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 = p_ref[0];
2931cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = p_ref[2];
2941cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 |= (r1 << 16);           /* 0,c,0,a */
2951cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = p_ref[1];
2961cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 = p_ref[3];
2971cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 |= (r2 << 16);           /* 0,d,0,b */
2981cc31e629e8132df390ae692873c847d1c2f62c0James Dong            while ((uint32)p_ref < tmp)
2991cc31e629e8132df390ae692873c847d1c2f62c0James Dong            {
3001cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = *(p_ref += 4); /* move pointer to e */
3011cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r3 = p_ref[2];
3021cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 |= (r3 << 16);           /* 0,g,0,e */
3031cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r3 = p_ref[1];
3041cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r4 = p_ref[3];
3051cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r3 |= (r4 << 16);           /* 0,h,0,f */
3061cc31e629e8132df390ae692873c847d1c2f62c0James Dong
3071cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r4 = r0 + r3;       /* c+h, a+f */
3081cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 = r0 + r1;   /* c+d, a+b */
3091cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r6 = r2 + r3;   /* g+h, e+f */
3101cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 >>= 16;
3111cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 |= (r6 << 16);   /* e+f, c+d */
3121cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r4 += r5 * 20;      /* c+20*e+20*f+h, a+20*c+20*d+f */
3131cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r4 += 0x100010; /* +16, +16 */
3141cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 = r1 + r2;       /* d+g, b+e */
3151cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r4 -= r5 * 5;       /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */
3161cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r4 >>= 5;
3171cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r13 |= r4;      /* check clipping */
3181cc31e629e8132df390ae692873c847d1c2f62c0James Dong
3191cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 = p_ref[dx+2];
3201cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r6 = p_ref[dx+4];
3211cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 |= (r6 << 16);
3221cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r4 += r5;
3231cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r4 += 0x10001;
3241cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r4 = (r4 >> 1) & 0xFF00FF;
3251cc31e629e8132df390ae692873c847d1c2f62c0James Dong
3261cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 = p_ref[4];  /* i */
3271cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r6 = (r5 << 16);
3281cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 = r6 | (r2 >> 16);/* 0,i,0,g */
3291cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 += r1;       /* d+i, b+g */ /* r5 not free */
3301cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 >>= 16;
3311cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */
3321cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 += r2;       /* f+g, d+e */
3331cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 += 20 * r1;  /* d+20f+20g+i, b+20d+20e+g */
3341cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 >>= 16;
3351cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */
3361cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 += r3;       /* e+h, c+f */
3371cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 += 0x100010; /* 16,16 */
3381cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 -= r0 * 5;       /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */
3391cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 >>= 5;
3401cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r13 |= r5;      /* check clipping */
3411cc31e629e8132df390ae692873c847d1c2f62c0James Dong
3421cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = p_ref[dx+3];
3431cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = p_ref[dx+5];
3441cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 |= (r1 << 16);
3451cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 += r0;
3461cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 += 0x10001;
3471cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 = (r5 >> 1) & 0xFF00FF;
3481cc31e629e8132df390ae692873c847d1c2f62c0James Dong
3491cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r4 |= (r5 << 8);    /* pack them together */
3501cc31e629e8132df390ae692873c847d1c2f62c0James Dong                *p_cur++ = r4;
3511cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = r3;
3521cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = r2;
3531cc31e629e8132df390ae692873c847d1c2f62c0James Dong            }
3541cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_cur += curr_offset; /* move to the next line */
3551cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_ref += ref_offset;  /*    ref_offset = inpitch-blkwidth; */
3561cc31e629e8132df390ae692873c847d1c2f62c0James Dong
3571cc31e629e8132df390ae692873c847d1c2f62c0James Dong            if (r13&0xFF000700) /* need clipping */
3581cc31e629e8132df390ae692873c847d1c2f62c0James Dong            {
3591cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* move back to the beginning of the line */
3601cc31e629e8132df390ae692873c847d1c2f62c0James Dong                p_ref -= (ref_offset + blkwidth);   /* input */
3611cc31e629e8132df390ae692873c847d1c2f62c0James Dong                p_cur -= (outpitch >> 2);
3621cc31e629e8132df390ae692873c847d1c2f62c0James Dong
3631cc31e629e8132df390ae692873c847d1c2f62c0James Dong                tmp = (uint32)(p_ref + blkwidth);
3641cc31e629e8132df390ae692873c847d1c2f62c0James Dong                for (; (uint32)p_ref < tmp;)
3651cc31e629e8132df390ae692873c847d1c2f62c0James Dong                {
3661cc31e629e8132df390ae692873c847d1c2f62c0James Dong
3671cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r0 = *p_ref++;
3681cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r1 = *p_ref++;
3691cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r2 = *p_ref++;
3701cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r3 = *p_ref++;
3711cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r4 = *p_ref++;
3721cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    /* first pixel */
3731cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r5 = *p_ref++;
3741cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (r0 + r5);
3751cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r0 = (r1 + r4);
3761cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
3771cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r0 = (r2 + r3);
3781cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
3791cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result + 16) >> 5;
3801cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    CLIP_RESULT(result)
3811cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    /* 3/4 pel,  no need to clip */
3821cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result + p_ref[dx] + 1);
3831cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    pkres = (result >> 1) ;
3841cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    /* second pixel */
3851cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r0 = *p_ref++;
3861cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (r1 + r0);
3871cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r1 = (r2 + r5);
3881cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
3891cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r1 = (r3 + r4);
3901cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
3911cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result + 16) >> 5;
3921cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    CLIP_RESULT(result)
3931cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    /* 3/4 pel,  no need to clip */
3941cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result + p_ref[dx] + 1);
3951cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result >> 1);
3961cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    pkres  |= (result << 8);
3971cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    /* third pixel */
3981cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r1 = *p_ref++;
3991cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (r2 + r1);
4001cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r2 = (r3 + r0);
4011cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
4021cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r2 = (r4 + r5);
4031cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
4041cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result + 16) >> 5;
4051cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    CLIP_RESULT(result)
4061cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    /* 3/4 pel,  no need to clip */
4071cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result + p_ref[dx] + 1);
4081cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result >> 1);
4091cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    pkres  |= (result << 16);
4101cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    /* fourth pixel */
4111cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r2 = *p_ref++;
4121cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (r3 + r2);
4131cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r3 = (r4 + r1);
4141cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
4151cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r3 = (r5 + r0);
4161cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
4171cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result + 16) >> 5;
4181cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    CLIP_RESULT(result)
4191cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    /* 3/4 pel,  no need to clip */
4201cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result + p_ref[dx] + 1);
4211cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result >> 1);
4221cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    pkres  |= (result << 24);
4231cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    *p_cur++ = pkres; /* write 4 pixels */
4241cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    p_ref -= 5;  /* offset back to the middle of filter */
4251cc31e629e8132df390ae692873c847d1c2f62c0James Dong                }
4261cc31e629e8132df390ae692873c847d1c2f62c0James Dong                p_cur += curr_offset;  /* move to the next line */
4271cc31e629e8132df390ae692873c847d1c2f62c0James Dong                p_ref += ref_offset;    /* move to the next line */
4281cc31e629e8132df390ae692873c847d1c2f62c0James Dong            }
4291cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
4301cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
4311cc31e629e8132df390ae692873c847d1c2f62c0James Dong    else
4321cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
4331cc31e629e8132df390ae692873c847d1c2f62c0James Dong        p_ref -= 2;
4341cc31e629e8132df390ae692873c847d1c2f62c0James Dong        r13 = 0;
4351cc31e629e8132df390ae692873c847d1c2f62c0James Dong        for (j = blkheight; j > 0; j--)
4361cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
4371cc31e629e8132df390ae692873c847d1c2f62c0James Dong            tmp = (uint32)(p_ref + blkwidth);
4381cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 = p_ref[0];
4391cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = p_ref[2];
4401cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 |= (r1 << 16);           /* 0,c,0,a */
4411cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = p_ref[1];
4421cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 = p_ref[3];
4431cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 |= (r2 << 16);           /* 0,d,0,b */
4441cc31e629e8132df390ae692873c847d1c2f62c0James Dong            while ((uint32)p_ref < tmp)
4451cc31e629e8132df390ae692873c847d1c2f62c0James Dong            {
4461cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = *(p_ref += 4); /* move pointer to e */
4471cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r3 = p_ref[2];
4481cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 |= (r3 << 16);           /* 0,g,0,e */
4491cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r3 = p_ref[1];
4501cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r4 = p_ref[3];
4511cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r3 |= (r4 << 16);           /* 0,h,0,f */
4521cc31e629e8132df390ae692873c847d1c2f62c0James Dong
4531cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r4 = r0 + r3;       /* c+h, a+f */
4541cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 = r0 + r1;   /* c+d, a+b */
4551cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r6 = r2 + r3;   /* g+h, e+f */
4561cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 >>= 16;
4571cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 |= (r6 << 16);   /* e+f, c+d */
4581cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r4 += r5 * 20;      /* c+20*e+20*f+h, a+20*c+20*d+f */
4591cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r4 += 0x100010; /* +16, +16 */
4601cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 = r1 + r2;       /* d+g, b+e */
4611cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r4 -= r5 * 5;       /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */
4621cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r4 >>= 5;
4631cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r13 |= r4;      /* check clipping */
4641cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r4 &= 0xFF00FF; /* mask */
4651cc31e629e8132df390ae692873c847d1c2f62c0James Dong
4661cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 = p_ref[4];  /* i */
4671cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r6 = (r5 << 16);
4681cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 = r6 | (r2 >> 16);/* 0,i,0,g */
4691cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 += r1;       /* d+i, b+g */ /* r5 not free */
4701cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 >>= 16;
4711cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */
4721cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 += r2;       /* f+g, d+e */
4731cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 += 20 * r1;  /* d+20f+20g+i, b+20d+20e+g */
4741cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 >>= 16;
4751cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */
4761cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 += r3;       /* e+h, c+f */
4771cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 += 0x100010; /* 16,16 */
4781cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 -= r0 * 5;       /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */
4791cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 >>= 5;
4801cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r13 |= r5;      /* check clipping */
4811cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 &= 0xFF00FF; /* mask */
4821cc31e629e8132df390ae692873c847d1c2f62c0James Dong
4831cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r4 |= (r5 << 8);    /* pack them together */
4841cc31e629e8132df390ae692873c847d1c2f62c0James Dong                *p_cur++ = r4;
4851cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = r3;
4861cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = r2;
4871cc31e629e8132df390ae692873c847d1c2f62c0James Dong            }
4881cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_cur += curr_offset; /* move to the next line */
4891cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_ref += ref_offset;  /*    ref_offset = inpitch-blkwidth; */
4901cc31e629e8132df390ae692873c847d1c2f62c0James Dong
4911cc31e629e8132df390ae692873c847d1c2f62c0James Dong            if (r13&0xFF000700) /* need clipping */
4921cc31e629e8132df390ae692873c847d1c2f62c0James Dong            {
4931cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* move back to the beginning of the line */
4941cc31e629e8132df390ae692873c847d1c2f62c0James Dong                p_ref -= (ref_offset + blkwidth);   /* input */
4951cc31e629e8132df390ae692873c847d1c2f62c0James Dong                p_cur -= (outpitch >> 2);
4961cc31e629e8132df390ae692873c847d1c2f62c0James Dong
4971cc31e629e8132df390ae692873c847d1c2f62c0James Dong                tmp = (uint32)(p_ref + blkwidth);
4981cc31e629e8132df390ae692873c847d1c2f62c0James Dong                for (; (uint32)p_ref < tmp;)
4991cc31e629e8132df390ae692873c847d1c2f62c0James Dong                {
5001cc31e629e8132df390ae692873c847d1c2f62c0James Dong
5011cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r0 = *p_ref++;
5021cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r1 = *p_ref++;
5031cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r2 = *p_ref++;
5041cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r3 = *p_ref++;
5051cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r4 = *p_ref++;
5061cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    /* first pixel */
5071cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r5 = *p_ref++;
5081cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (r0 + r5);
5091cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r0 = (r1 + r4);
5101cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
5111cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r0 = (r2 + r3);
5121cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
5131cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result + 16) >> 5;
5141cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    CLIP_RESULT(result)
5151cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    pkres  = result;
5161cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    /* second pixel */
5171cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r0 = *p_ref++;
5181cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (r1 + r0);
5191cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r1 = (r2 + r5);
5201cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
5211cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r1 = (r3 + r4);
5221cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
5231cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result + 16) >> 5;
5241cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    CLIP_RESULT(result)
5251cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    pkres  |= (result << 8);
5261cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    /* third pixel */
5271cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r1 = *p_ref++;
5281cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (r2 + r1);
5291cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r2 = (r3 + r0);
5301cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
5311cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r2 = (r4 + r5);
5321cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
5331cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result + 16) >> 5;
5341cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    CLIP_RESULT(result)
5351cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    pkres  |= (result << 16);
5361cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    /* fourth pixel */
5371cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r2 = *p_ref++;
5381cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (r3 + r2);
5391cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r3 = (r4 + r1);
5401cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
5411cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r3 = (r5 + r0);
5421cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
5431cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result + 16) >> 5;
5441cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    CLIP_RESULT(result)
5451cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    pkres  |= (result << 24);
5461cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    *p_cur++ = pkres;   /* write 4 pixels */
5471cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    p_ref -= 5;
5481cc31e629e8132df390ae692873c847d1c2f62c0James Dong                }
5491cc31e629e8132df390ae692873c847d1c2f62c0James Dong                p_cur += curr_offset; /* move to the next line */
5501cc31e629e8132df390ae692873c847d1c2f62c0James Dong                p_ref += ref_offset;
5511cc31e629e8132df390ae692873c847d1c2f62c0James Dong            }
5521cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
5531cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
5541cc31e629e8132df390ae692873c847d1c2f62c0James Dong
5551cc31e629e8132df390ae692873c847d1c2f62c0James Dong    return ;
5561cc31e629e8132df390ae692873c847d1c2f62c0James Dong}
5571cc31e629e8132df390ae692873c847d1c2f62c0James Dong
5581cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eHorzInterp2MC(int *in, int inpitch, uint8 *out, int outpitch,
5591cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    int blkwidth, int blkheight, int dx)
5601cc31e629e8132df390ae692873c847d1c2f62c0James Dong{
5611cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int *p_ref;
5621cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint32 *p_cur;
5631cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint32 tmp, pkres;
5641cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int result, result2, curr_offset, ref_offset;
5651cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int j, r0, r1, r2, r3, r4, r5;
5661cc31e629e8132df390ae692873c847d1c2f62c0James Dong
5671cc31e629e8132df390ae692873c847d1c2f62c0James Dong    p_cur = (uint32*)out; /* assume it's word aligned */
5681cc31e629e8132df390ae692873c847d1c2f62c0James Dong    curr_offset = (outpitch - blkwidth) >> 2;
5691cc31e629e8132df390ae692873c847d1c2f62c0James Dong    p_ref = in;
5701cc31e629e8132df390ae692873c847d1c2f62c0James Dong    ref_offset = inpitch - blkwidth;
5711cc31e629e8132df390ae692873c847d1c2f62c0James Dong
5721cc31e629e8132df390ae692873c847d1c2f62c0James Dong    if (dx&1)
5731cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
5741cc31e629e8132df390ae692873c847d1c2f62c0James Dong        dx = ((dx >> 1) ? -3 : -4); /* use in 3/4 pel */
5751cc31e629e8132df390ae692873c847d1c2f62c0James Dong
5761cc31e629e8132df390ae692873c847d1c2f62c0James Dong        for (j = blkheight; j > 0 ; j--)
5771cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
5781cc31e629e8132df390ae692873c847d1c2f62c0James Dong            tmp = (uint32)(p_ref + blkwidth);
5791cc31e629e8132df390ae692873c847d1c2f62c0James Dong            for (; (uint32)p_ref < tmp;)
5801cc31e629e8132df390ae692873c847d1c2f62c0James Dong            {
5811cc31e629e8132df390ae692873c847d1c2f62c0James Dong
5821cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = p_ref[-2];
5831cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = p_ref[-1];
5841cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = *p_ref++;
5851cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r3 = *p_ref++;
5861cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r4 = *p_ref++;
5871cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* first pixel */
5881cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 = *p_ref++;
5891cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (r0 + r5);
5901cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = (r1 + r4);
5911cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
5921cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = (r2 + r3);
5931cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
5941cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + 512) >> 10;
5951cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result)
5961cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result2 = ((p_ref[dx] + 16) >> 5);
5971cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result2)
5981cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* 3/4 pel,  no need to clip */
5991cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + result2 + 1);
6001cc31e629e8132df390ae692873c847d1c2f62c0James Dong                pkres = (result >> 1);
6011cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* second pixel */
6021cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = *p_ref++;
6031cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (r1 + r0);
6041cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = (r2 + r5);
6051cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
6061cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = (r3 + r4);
6071cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
6081cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + 512) >> 10;
6091cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result)
6101cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result2 = ((p_ref[dx] + 16) >> 5);
6111cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result2)
6121cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* 3/4 pel,  no need to clip */
6131cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + result2 + 1);
6141cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result >> 1);
6151cc31e629e8132df390ae692873c847d1c2f62c0James Dong                pkres  |= (result << 8);
6161cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* third pixel */
6171cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = *p_ref++;
6181cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (r2 + r1);
6191cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = (r3 + r0);
6201cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
6211cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = (r4 + r5);
6221cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
6231cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + 512) >> 10;
6241cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result)
6251cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result2 = ((p_ref[dx] + 16) >> 5);
6261cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result2)
6271cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* 3/4 pel,  no need to clip */
6281cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + result2 + 1);
6291cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result >> 1);
6301cc31e629e8132df390ae692873c847d1c2f62c0James Dong                pkres  |= (result << 16);
6311cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* fourth pixel */
6321cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = *p_ref++;
6331cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (r3 + r2);
6341cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r3 = (r4 + r1);
6351cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
6361cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r3 = (r5 + r0);
6371cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
6381cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + 512) >> 10;
6391cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result)
6401cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result2 = ((p_ref[dx] + 16) >> 5);
6411cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result2)
6421cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* 3/4 pel,  no need to clip */
6431cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + result2 + 1);
6441cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result >> 1);
6451cc31e629e8132df390ae692873c847d1c2f62c0James Dong                pkres  |= (result << 24);
6461cc31e629e8132df390ae692873c847d1c2f62c0James Dong                *p_cur++ = pkres; /* write 4 pixels */
6471cc31e629e8132df390ae692873c847d1c2f62c0James Dong                p_ref -= 3;  /* offset back to the middle of filter */
6481cc31e629e8132df390ae692873c847d1c2f62c0James Dong            }
6491cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_cur += curr_offset;  /* move to the next line */
6501cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_ref += ref_offset;    /* move to the next line */
6511cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
6521cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
6531cc31e629e8132df390ae692873c847d1c2f62c0James Dong    else
6541cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
6551cc31e629e8132df390ae692873c847d1c2f62c0James Dong        for (j = blkheight; j > 0 ; j--)
6561cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
6571cc31e629e8132df390ae692873c847d1c2f62c0James Dong            tmp = (uint32)(p_ref + blkwidth);
6581cc31e629e8132df390ae692873c847d1c2f62c0James Dong            for (; (uint32)p_ref < tmp;)
6591cc31e629e8132df390ae692873c847d1c2f62c0James Dong            {
6601cc31e629e8132df390ae692873c847d1c2f62c0James Dong
6611cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = p_ref[-2];
6621cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = p_ref[-1];
6631cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = *p_ref++;
6641cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r3 = *p_ref++;
6651cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r4 = *p_ref++;
6661cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* first pixel */
6671cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 = *p_ref++;
6681cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (r0 + r5);
6691cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = (r1 + r4);
6701cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
6711cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = (r2 + r3);
6721cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
6731cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + 512) >> 10;
6741cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result)
6751cc31e629e8132df390ae692873c847d1c2f62c0James Dong                pkres  = result;
6761cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* second pixel */
6771cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = *p_ref++;
6781cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (r1 + r0);
6791cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = (r2 + r5);
6801cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
6811cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = (r3 + r4);
6821cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
6831cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + 512) >> 10;
6841cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result)
6851cc31e629e8132df390ae692873c847d1c2f62c0James Dong                pkres  |= (result << 8);
6861cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* third pixel */
6871cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = *p_ref++;
6881cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (r2 + r1);
6891cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = (r3 + r0);
6901cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
6911cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = (r4 + r5);
6921cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
6931cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + 512) >> 10;
6941cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result)
6951cc31e629e8132df390ae692873c847d1c2f62c0James Dong                pkres  |= (result << 16);
6961cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* fourth pixel */
6971cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = *p_ref++;
6981cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (r3 + r2);
6991cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r3 = (r4 + r1);
7001cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
7011cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r3 = (r5 + r0);
7021cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
7031cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + 512) >> 10;
7041cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result)
7051cc31e629e8132df390ae692873c847d1c2f62c0James Dong                pkres  |= (result << 24);
7061cc31e629e8132df390ae692873c847d1c2f62c0James Dong                *p_cur++ = pkres; /* write 4 pixels */
7071cc31e629e8132df390ae692873c847d1c2f62c0James Dong                p_ref -= 3;  /* offset back to the middle of filter */
7081cc31e629e8132df390ae692873c847d1c2f62c0James Dong            }
7091cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_cur += curr_offset;  /* move to the next line */
7101cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_ref += ref_offset;    /* move to the next line */
7111cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
7121cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
7131cc31e629e8132df390ae692873c847d1c2f62c0James Dong
7141cc31e629e8132df390ae692873c847d1c2f62c0James Dong    return ;
7151cc31e629e8132df390ae692873c847d1c2f62c0James Dong}
7161cc31e629e8132df390ae692873c847d1c2f62c0James Dong
7171cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eHorzInterp3MC(uint8 *in, int inpitch, int *out, int outpitch,
7181cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    int blkwidth, int blkheight)
7191cc31e629e8132df390ae692873c847d1c2f62c0James Dong{
7201cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint8 *p_ref;
7211cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int   *p_cur;
7221cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint32 tmp;
7231cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int result, curr_offset, ref_offset;
7241cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int j, r0, r1, r2, r3, r4, r5;
7251cc31e629e8132df390ae692873c847d1c2f62c0James Dong
7261cc31e629e8132df390ae692873c847d1c2f62c0James Dong    p_cur = out;
7271cc31e629e8132df390ae692873c847d1c2f62c0James Dong    curr_offset = (outpitch - blkwidth);
7281cc31e629e8132df390ae692873c847d1c2f62c0James Dong    p_ref = in;
7291cc31e629e8132df390ae692873c847d1c2f62c0James Dong    ref_offset = inpitch - blkwidth;
7301cc31e629e8132df390ae692873c847d1c2f62c0James Dong
7311cc31e629e8132df390ae692873c847d1c2f62c0James Dong    for (j = blkheight; j > 0 ; j--)
7321cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
7331cc31e629e8132df390ae692873c847d1c2f62c0James Dong        tmp = (uint32)(p_ref + blkwidth);
7341cc31e629e8132df390ae692873c847d1c2f62c0James Dong        for (; (uint32)p_ref < tmp;)
7351cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
7361cc31e629e8132df390ae692873c847d1c2f62c0James Dong
7371cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 = p_ref[-2];
7381cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = p_ref[-1];
7391cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 = *p_ref++;
7401cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r3 = *p_ref++;
7411cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r4 = *p_ref++;
7421cc31e629e8132df390ae692873c847d1c2f62c0James Dong            /* first pixel */
7431cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r5 = *p_ref++;
7441cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result = (r0 + r5);
7451cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 = (r1 + r4);
7461cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
7471cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 = (r2 + r3);
7481cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
7491cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *p_cur++ = result;
7501cc31e629e8132df390ae692873c847d1c2f62c0James Dong            /* second pixel */
7511cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 = *p_ref++;
7521cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result = (r1 + r0);
7531cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = (r2 + r5);
7541cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
7551cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = (r3 + r4);
7561cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
7571cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *p_cur++ = result;
7581cc31e629e8132df390ae692873c847d1c2f62c0James Dong            /* third pixel */
7591cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = *p_ref++;
7601cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result = (r2 + r1);
7611cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 = (r3 + r0);
7621cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
7631cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 = (r4 + r5);
7641cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
7651cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *p_cur++ = result;
7661cc31e629e8132df390ae692873c847d1c2f62c0James Dong            /* fourth pixel */
7671cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 = *p_ref++;
7681cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result = (r3 + r2);
7691cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r3 = (r4 + r1);
7701cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
7711cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r3 = (r5 + r0);
7721cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
7731cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *p_cur++ = result;
7741cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_ref -= 3; /* move back to the middle of the filter */
7751cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
7761cc31e629e8132df390ae692873c847d1c2f62c0James Dong        p_cur += curr_offset; /* move to the next line */
7771cc31e629e8132df390ae692873c847d1c2f62c0James Dong        p_ref += ref_offset;
7781cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
7791cc31e629e8132df390ae692873c847d1c2f62c0James Dong
7801cc31e629e8132df390ae692873c847d1c2f62c0James Dong    return ;
7811cc31e629e8132df390ae692873c847d1c2f62c0James Dong}
7821cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eVertInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch,
7831cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    int blkwidth, int blkheight, int dy)
7841cc31e629e8132df390ae692873c847d1c2f62c0James Dong{
7851cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint8 *p_cur, *p_ref;
7861cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint32 tmp;
7871cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int result, curr_offset, ref_offset;
7881cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int j, i;
7891cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int32 r0, r1, r2, r3, r4, r5, r6, r7, r8, r13;
7901cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint8  tmp_in[24][24];
7911cc31e629e8132df390ae692873c847d1c2f62c0James Dong
7921cc31e629e8132df390ae692873c847d1c2f62c0James Dong    /* not word-aligned */
7931cc31e629e8132df390ae692873c847d1c2f62c0James Dong    if (((uint32)in)&0x3)
7941cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
7951cc31e629e8132df390ae692873c847d1c2f62c0James Dong        eCreateAlign(in, inpitch, -2, &tmp_in[0][0], blkwidth, blkheight + 5);
7961cc31e629e8132df390ae692873c847d1c2f62c0James Dong        in = &tmp_in[2][0];
7971cc31e629e8132df390ae692873c847d1c2f62c0James Dong        inpitch = 24;
7981cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
7991cc31e629e8132df390ae692873c847d1c2f62c0James Dong    p_cur = out;
8001cc31e629e8132df390ae692873c847d1c2f62c0James Dong    curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */
8011cc31e629e8132df390ae692873c847d1c2f62c0James Dong    ref_offset = blkheight * inpitch; /* for limit */
8021cc31e629e8132df390ae692873c847d1c2f62c0James Dong
8031cc31e629e8132df390ae692873c847d1c2f62c0James Dong    curr_offset += 3;
8041cc31e629e8132df390ae692873c847d1c2f62c0James Dong
8051cc31e629e8132df390ae692873c847d1c2f62c0James Dong    if (dy&1)
8061cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
8071cc31e629e8132df390ae692873c847d1c2f62c0James Dong        dy = (dy >> 1) ? 0 : -inpitch;
8081cc31e629e8132df390ae692873c847d1c2f62c0James Dong
8091cc31e629e8132df390ae692873c847d1c2f62c0James Dong        for (j = 0; j < blkwidth; j += 4, in += 4)
8101cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
8111cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r13 = 0;
8121cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_ref = in;
8131cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_cur -= outpitch;  /* compensate for the first offset */
8141cc31e629e8132df390ae692873c847d1c2f62c0James Dong            tmp = (uint32)(p_ref + ref_offset); /* limit */
8151cc31e629e8132df390ae692873c847d1c2f62c0James Dong            while ((uint32)p_ref < tmp)  /* the loop un-rolled  */
8161cc31e629e8132df390ae692873c847d1c2f62c0James Dong            {
8171cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */
8181cc31e629e8132df390ae692873c847d1c2f62c0James Dong                p_ref += inpitch;
8191cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */
8201cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 &= 0xFF00FF;
8211cc31e629e8132df390ae692873c847d1c2f62c0James Dong
8221cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = *((uint32*)(p_ref + (inpitch << 1)));  /* r1, r7, ref[3] */
8231cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r7 = (r1 >> 8) & 0xFF00FF;
8241cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 &= 0xFF00FF;
8251cc31e629e8132df390ae692873c847d1c2f62c0James Dong
8261cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 += r1;
8271cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r6 += r7;
8281cc31e629e8132df390ae692873c847d1c2f62c0James Dong
8291cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */
8301cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r8 = (r2 >> 8) & 0xFF00FF;
8311cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 &= 0xFF00FF;
8321cc31e629e8132df390ae692873c847d1c2f62c0James Dong
8331cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */
8341cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r7 = (r1 >> 8) & 0xFF00FF;
8351cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 &= 0xFF00FF;
8361cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 += r2;
8371cc31e629e8132df390ae692873c847d1c2f62c0James Dong
8381cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r7 += r8;
8391cc31e629e8132df390ae692873c847d1c2f62c0James Dong
8401cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 += 20 * r1;
8411cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r6 += 20 * r7;
8421cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 += 0x100010;
8431cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r6 += 0x100010;
8441cc31e629e8132df390ae692873c847d1c2f62c0James Dong
8451cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */
8461cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r8 = (r2 >> 8) & 0xFF00FF;
8471cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 &= 0xFF00FF;
8481cc31e629e8132df390ae692873c847d1c2f62c0James Dong
8491cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */
8501cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r7 = (r1 >> 8) & 0xFF00FF;
8511cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 &= 0xFF00FF;
8521cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 += r2;
8531cc31e629e8132df390ae692873c847d1c2f62c0James Dong
8541cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r7 += r8;
8551cc31e629e8132df390ae692873c847d1c2f62c0James Dong
8561cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 -= 5 * r1;
8571cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r6 -= 5 * r7;
8581cc31e629e8132df390ae692873c847d1c2f62c0James Dong
8591cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 >>= 5;
8601cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r6 >>= 5;
8611cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* clip */
8621cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r13 |= r6;
8631cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r13 |= r0;
8641cc31e629e8132df390ae692873c847d1c2f62c0James Dong                //CLIPPACK(r6,result)
8651cc31e629e8132df390ae692873c847d1c2f62c0James Dong
8661cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = *((uint32*)(p_ref + dy));
8671cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = (r1 >> 8) & 0xFF00FF;
8681cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 &= 0xFF00FF;
8691cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 += r1;
8701cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r6 += r2;
8711cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 += 0x10001;
8721cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r6 += 0x10001;
8731cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = (r0 >> 1) & 0xFF00FF;
8741cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r6 = (r6 >> 1) & 0xFF00FF;
8751cc31e629e8132df390ae692873c847d1c2f62c0James Dong
8761cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 |= (r6 << 8);  /* pack it back */
8771cc31e629e8132df390ae692873c847d1c2f62c0James Dong                *((uint32*)(p_cur += outpitch)) = r0;
8781cc31e629e8132df390ae692873c847d1c2f62c0James Dong            }
8791cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_cur += curr_offset; /* offset to the next pixel */
8801cc31e629e8132df390ae692873c847d1c2f62c0James Dong            if (r13 & 0xFF000700) /* this column need clipping */
8811cc31e629e8132df390ae692873c847d1c2f62c0James Dong            {
8821cc31e629e8132df390ae692873c847d1c2f62c0James Dong                p_cur -= 4;
8831cc31e629e8132df390ae692873c847d1c2f62c0James Dong                for (i = 0; i < 4; i++)
8841cc31e629e8132df390ae692873c847d1c2f62c0James Dong                {
8851cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    p_ref = in + i;
8861cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    p_cur -= outpitch;  /* compensate for the first offset */
8871cc31e629e8132df390ae692873c847d1c2f62c0James Dong
8881cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    tmp = (uint32)(p_ref + ref_offset); /* limit */
8891cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    while ((uint32)p_ref < tmp)
8901cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    {                           /* loop un-rolled */
8911cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r0 = *(p_ref - (inpitch << 1));
8921cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r1 = *(p_ref - inpitch);
8931cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r2 = *p_ref;
8941cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r3 = *(p_ref += inpitch);  /* modify pointer before loading */
8951cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r4 = *(p_ref += inpitch);
8961cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        /* first pixel */
8971cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r5 = *(p_ref += inpitch);
8981cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (r0 + r5);
8991cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r0 = (r1 + r4);
9001cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
9011cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r0 = (r2 + r3);
9021cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
9031cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (result + 16) >> 5;
9041cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        CLIP_RESULT(result)
9051cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        /* 3/4 pel,  no need to clip */
9061cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (result + p_ref[dy-(inpitch<<1)] + 1);
9071cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (result >> 1);
9081cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        *(p_cur += outpitch) = result;
9091cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        /* second pixel */
9101cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r0 = *(p_ref += inpitch);
9111cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (r1 + r0);
9121cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r1 = (r2 + r5);
9131cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
9141cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r1 = (r3 + r4);
9151cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
9161cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (result + 16) >> 5;
9171cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        CLIP_RESULT(result)
9181cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        /* 3/4 pel,  no need to clip */
9191cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (result + p_ref[dy-(inpitch<<1)] + 1);
9201cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (result >> 1);
9211cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        *(p_cur += outpitch) = result;
9221cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        /* third pixel */
9231cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r1 = *(p_ref += inpitch);
9241cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (r2 + r1);
9251cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r2 = (r3 + r0);
9261cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
9271cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r2 = (r4 + r5);
9281cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
9291cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (result + 16) >> 5;
9301cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        CLIP_RESULT(result)
9311cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        /* 3/4 pel,  no need to clip */
9321cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (result + p_ref[dy-(inpitch<<1)] + 1);
9331cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (result >> 1);
9341cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        *(p_cur += outpitch) = result;
9351cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        /* fourth pixel */
9361cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r2 = *(p_ref += inpitch);
9371cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (r3 + r2);
9381cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r3 = (r4 + r1);
9391cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
9401cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r3 = (r5 + r0);
9411cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
9421cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (result + 16) >> 5;
9431cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        CLIP_RESULT(result)
9441cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        /* 3/4 pel,  no need to clip */
9451cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (result + p_ref[dy-(inpitch<<1)] + 1);
9461cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (result >> 1);
9471cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        *(p_cur += outpitch) = result;
9481cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        p_ref -= (inpitch << 1);  /* move back to center of the filter of the next one */
9491cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    }
9501cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    p_cur += (curr_offset - 3);
9511cc31e629e8132df390ae692873c847d1c2f62c0James Dong                }
9521cc31e629e8132df390ae692873c847d1c2f62c0James Dong            }
9531cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
9541cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
9551cc31e629e8132df390ae692873c847d1c2f62c0James Dong    else
9561cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
9571cc31e629e8132df390ae692873c847d1c2f62c0James Dong        for (j = 0; j < blkwidth; j += 4, in += 4)
9581cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
9591cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r13 = 0;
9601cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_ref = in;
9611cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_cur -= outpitch;  /* compensate for the first offset */
9621cc31e629e8132df390ae692873c847d1c2f62c0James Dong            tmp = (uint32)(p_ref + ref_offset); /* limit */
9631cc31e629e8132df390ae692873c847d1c2f62c0James Dong            while ((uint32)p_ref < tmp)  /* the loop un-rolled  */
9641cc31e629e8132df390ae692873c847d1c2f62c0James Dong            {
9651cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */
9661cc31e629e8132df390ae692873c847d1c2f62c0James Dong                p_ref += inpitch;
9671cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */
9681cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 &= 0xFF00FF;
9691cc31e629e8132df390ae692873c847d1c2f62c0James Dong
9701cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = *((uint32*)(p_ref + (inpitch << 1)));  /* r1, r7, ref[3] */
9711cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r7 = (r1 >> 8) & 0xFF00FF;
9721cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 &= 0xFF00FF;
9731cc31e629e8132df390ae692873c847d1c2f62c0James Dong
9741cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 += r1;
9751cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r6 += r7;
9761cc31e629e8132df390ae692873c847d1c2f62c0James Dong
9771cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */
9781cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r8 = (r2 >> 8) & 0xFF00FF;
9791cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 &= 0xFF00FF;
9801cc31e629e8132df390ae692873c847d1c2f62c0James Dong
9811cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */
9821cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r7 = (r1 >> 8) & 0xFF00FF;
9831cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 &= 0xFF00FF;
9841cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 += r2;
9851cc31e629e8132df390ae692873c847d1c2f62c0James Dong
9861cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r7 += r8;
9871cc31e629e8132df390ae692873c847d1c2f62c0James Dong
9881cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 += 20 * r1;
9891cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r6 += 20 * r7;
9901cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 += 0x100010;
9911cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r6 += 0x100010;
9921cc31e629e8132df390ae692873c847d1c2f62c0James Dong
9931cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */
9941cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r8 = (r2 >> 8) & 0xFF00FF;
9951cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 &= 0xFF00FF;
9961cc31e629e8132df390ae692873c847d1c2f62c0James Dong
9971cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */
9981cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r7 = (r1 >> 8) & 0xFF00FF;
9991cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 &= 0xFF00FF;
10001cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 += r2;
10011cc31e629e8132df390ae692873c847d1c2f62c0James Dong
10021cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r7 += r8;
10031cc31e629e8132df390ae692873c847d1c2f62c0James Dong
10041cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 -= 5 * r1;
10051cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r6 -= 5 * r7;
10061cc31e629e8132df390ae692873c847d1c2f62c0James Dong
10071cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 >>= 5;
10081cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r6 >>= 5;
10091cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* clip */
10101cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r13 |= r6;
10111cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r13 |= r0;
10121cc31e629e8132df390ae692873c847d1c2f62c0James Dong                //CLIPPACK(r6,result)
10131cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 &= 0xFF00FF;
10141cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r6 &= 0xFF00FF;
10151cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 |= (r6 << 8);  /* pack it back */
10161cc31e629e8132df390ae692873c847d1c2f62c0James Dong                *((uint32*)(p_cur += outpitch)) = r0;
10171cc31e629e8132df390ae692873c847d1c2f62c0James Dong            }
10181cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_cur += curr_offset; /* offset to the next pixel */
10191cc31e629e8132df390ae692873c847d1c2f62c0James Dong            if (r13 & 0xFF000700) /* this column need clipping */
10201cc31e629e8132df390ae692873c847d1c2f62c0James Dong            {
10211cc31e629e8132df390ae692873c847d1c2f62c0James Dong                p_cur -= 4;
10221cc31e629e8132df390ae692873c847d1c2f62c0James Dong                for (i = 0; i < 4; i++)
10231cc31e629e8132df390ae692873c847d1c2f62c0James Dong                {
10241cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    p_ref = in + i;
10251cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    p_cur -= outpitch;  /* compensate for the first offset */
10261cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    tmp = (uint32)(p_ref + ref_offset); /* limit */
10271cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    while ((uint32)p_ref < tmp)
10281cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    {                           /* loop un-rolled */
10291cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r0 = *(p_ref - (inpitch << 1));
10301cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r1 = *(p_ref - inpitch);
10311cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r2 = *p_ref;
10321cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r3 = *(p_ref += inpitch);  /* modify pointer before loading */
10331cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r4 = *(p_ref += inpitch);
10341cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        /* first pixel */
10351cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r5 = *(p_ref += inpitch);
10361cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (r0 + r5);
10371cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r0 = (r1 + r4);
10381cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
10391cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r0 = (r2 + r3);
10401cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
10411cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (result + 16) >> 5;
10421cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        CLIP_RESULT(result)
10431cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        *(p_cur += outpitch) = result;
10441cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        /* second pixel */
10451cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r0 = *(p_ref += inpitch);
10461cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (r1 + r0);
10471cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r1 = (r2 + r5);
10481cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
10491cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r1 = (r3 + r4);
10501cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
10511cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (result + 16) >> 5;
10521cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        CLIP_RESULT(result)
10531cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        *(p_cur += outpitch) = result;
10541cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        /* third pixel */
10551cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r1 = *(p_ref += inpitch);
10561cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (r2 + r1);
10571cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r2 = (r3 + r0);
10581cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
10591cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r2 = (r4 + r5);
10601cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
10611cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (result + 16) >> 5;
10621cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        CLIP_RESULT(result)
10631cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        *(p_cur += outpitch) = result;
10641cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        /* fourth pixel */
10651cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r2 = *(p_ref += inpitch);
10661cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (r3 + r2);
10671cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r3 = (r4 + r1);
10681cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
10691cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        r3 = (r5 + r0);
10701cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
10711cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        result = (result + 16) >> 5;
10721cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        CLIP_RESULT(result)
10731cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        *(p_cur += outpitch) = result;
10741cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        p_ref -= (inpitch << 1);  /* move back to center of the filter of the next one */
10751cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    }
10761cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    p_cur += (curr_offset - 3);
10771cc31e629e8132df390ae692873c847d1c2f62c0James Dong                }
10781cc31e629e8132df390ae692873c847d1c2f62c0James Dong            }
10791cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
10801cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
10811cc31e629e8132df390ae692873c847d1c2f62c0James Dong
10821cc31e629e8132df390ae692873c847d1c2f62c0James Dong    return ;
10831cc31e629e8132df390ae692873c847d1c2f62c0James Dong}
10841cc31e629e8132df390ae692873c847d1c2f62c0James Dong
10851cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eVertInterp2MC(uint8 *in, int inpitch, int *out, int outpitch,
10861cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    int blkwidth, int blkheight)
10871cc31e629e8132df390ae692873c847d1c2f62c0James Dong{
10881cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int *p_cur;
10891cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint8 *p_ref;
10901cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint32 tmp;
10911cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int result, curr_offset, ref_offset;
10921cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int j, r0, r1, r2, r3, r4, r5;
10931cc31e629e8132df390ae692873c847d1c2f62c0James Dong
10941cc31e629e8132df390ae692873c847d1c2f62c0James Dong    p_cur = out;
10951cc31e629e8132df390ae692873c847d1c2f62c0James Dong    curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */
10961cc31e629e8132df390ae692873c847d1c2f62c0James Dong    ref_offset = blkheight * inpitch; /* for limit */
10971cc31e629e8132df390ae692873c847d1c2f62c0James Dong
10981cc31e629e8132df390ae692873c847d1c2f62c0James Dong    for (j = 0; j < blkwidth; j++)
10991cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
11001cc31e629e8132df390ae692873c847d1c2f62c0James Dong        p_cur -= outpitch; /* compensate for the first offset */
11011cc31e629e8132df390ae692873c847d1c2f62c0James Dong        p_ref = in++;
11021cc31e629e8132df390ae692873c847d1c2f62c0James Dong
11031cc31e629e8132df390ae692873c847d1c2f62c0James Dong        tmp = (uint32)(p_ref + ref_offset); /* limit */
11041cc31e629e8132df390ae692873c847d1c2f62c0James Dong        while ((uint32)p_ref < tmp)
11051cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {                           /* loop un-rolled */
11061cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 = *(p_ref - (inpitch << 1));
11071cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = *(p_ref - inpitch);
11081cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 = *p_ref;
11091cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r3 = *(p_ref += inpitch);  /* modify pointer before loading */
11101cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r4 = *(p_ref += inpitch);
11111cc31e629e8132df390ae692873c847d1c2f62c0James Dong            /* first pixel */
11121cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r5 = *(p_ref += inpitch);
11131cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result = (r0 + r5);
11141cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 = (r1 + r4);
11151cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
11161cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 = (r2 + r3);
11171cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
11181cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *(p_cur += outpitch) = result;
11191cc31e629e8132df390ae692873c847d1c2f62c0James Dong            /* second pixel */
11201cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 = *(p_ref += inpitch);
11211cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result = (r1 + r0);
11221cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = (r2 + r5);
11231cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
11241cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = (r3 + r4);
11251cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
11261cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *(p_cur += outpitch) = result;
11271cc31e629e8132df390ae692873c847d1c2f62c0James Dong            /* third pixel */
11281cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = *(p_ref += inpitch);
11291cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result = (r2 + r1);
11301cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 = (r3 + r0);
11311cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
11321cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 = (r4 + r5);
11331cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
11341cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *(p_cur += outpitch) = result;
11351cc31e629e8132df390ae692873c847d1c2f62c0James Dong            /* fourth pixel */
11361cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 = *(p_ref += inpitch);
11371cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result = (r3 + r2);
11381cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r3 = (r4 + r1);
11391cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
11401cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r3 = (r5 + r0);
11411cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
11421cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *(p_cur += outpitch) = result;
11431cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_ref -= (inpitch << 1);  /* move back to center of the filter of the next one */
11441cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
11451cc31e629e8132df390ae692873c847d1c2f62c0James Dong        p_cur += curr_offset;
11461cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
11471cc31e629e8132df390ae692873c847d1c2f62c0James Dong
11481cc31e629e8132df390ae692873c847d1c2f62c0James Dong    return ;
11491cc31e629e8132df390ae692873c847d1c2f62c0James Dong}
11501cc31e629e8132df390ae692873c847d1c2f62c0James Dong
11511cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eVertInterp3MC(int *in, int inpitch, uint8 *out, int outpitch,
11521cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    int blkwidth, int blkheight, int dy)
11531cc31e629e8132df390ae692873c847d1c2f62c0James Dong{
11541cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint8 *p_cur;
11551cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int *p_ref;
11561cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint32 tmp;
11571cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int result, result2, curr_offset, ref_offset;
11581cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int j, r0, r1, r2, r3, r4, r5;
11591cc31e629e8132df390ae692873c847d1c2f62c0James Dong
11601cc31e629e8132df390ae692873c847d1c2f62c0James Dong    p_cur = out;
11611cc31e629e8132df390ae692873c847d1c2f62c0James Dong    curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */
11621cc31e629e8132df390ae692873c847d1c2f62c0James Dong    ref_offset = blkheight * inpitch; /* for limit */
11631cc31e629e8132df390ae692873c847d1c2f62c0James Dong
11641cc31e629e8132df390ae692873c847d1c2f62c0James Dong    if (dy&1)
11651cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
11661cc31e629e8132df390ae692873c847d1c2f62c0James Dong        dy = (dy >> 1) ? -(inpitch << 1) : -(inpitch << 1) - inpitch;
11671cc31e629e8132df390ae692873c847d1c2f62c0James Dong
11681cc31e629e8132df390ae692873c847d1c2f62c0James Dong        for (j = 0; j < blkwidth; j++)
11691cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
11701cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_cur -= outpitch; /* compensate for the first offset */
11711cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_ref = in++;
11721cc31e629e8132df390ae692873c847d1c2f62c0James Dong
11731cc31e629e8132df390ae692873c847d1c2f62c0James Dong            tmp = (uint32)(p_ref + ref_offset); /* limit */
11741cc31e629e8132df390ae692873c847d1c2f62c0James Dong            while ((uint32)p_ref < tmp)
11751cc31e629e8132df390ae692873c847d1c2f62c0James Dong            {                           /* loop un-rolled */
11761cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = *(p_ref - (inpitch << 1));
11771cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = *(p_ref - inpitch);
11781cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = *p_ref;
11791cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r3 = *(p_ref += inpitch);  /* modify pointer before loading */
11801cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r4 = *(p_ref += inpitch);
11811cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* first pixel */
11821cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 = *(p_ref += inpitch);
11831cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (r0 + r5);
11841cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = (r1 + r4);
11851cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
11861cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = (r2 + r3);
11871cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
11881cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + 512) >> 10;
11891cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result)
11901cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result2 = ((p_ref[dy] + 16) >> 5);
11911cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result2)
11921cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* 3/4 pel,  no need to clip */
11931cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + result2 + 1);
11941cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result >> 1);
11951cc31e629e8132df390ae692873c847d1c2f62c0James Dong                *(p_cur += outpitch) = result;
11961cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* second pixel */
11971cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = *(p_ref += inpitch);
11981cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (r1 + r0);
11991cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = (r2 + r5);
12001cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
12011cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = (r3 + r4);
12021cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
12031cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + 512) >> 10;
12041cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result)
12051cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result2 = ((p_ref[dy] + 16) >> 5);
12061cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result2)
12071cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* 3/4 pel,  no need to clip */
12081cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + result2 + 1);
12091cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result >> 1);
12101cc31e629e8132df390ae692873c847d1c2f62c0James Dong                *(p_cur += outpitch) = result;
12111cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* third pixel */
12121cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = *(p_ref += inpitch);
12131cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (r2 + r1);
12141cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = (r3 + r0);
12151cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
12161cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = (r4 + r5);
12171cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
12181cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + 512) >> 10;
12191cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result)
12201cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result2 = ((p_ref[dy] + 16) >> 5);
12211cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result2)
12221cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* 3/4 pel,  no need to clip */
12231cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + result2 + 1);
12241cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result >> 1);
12251cc31e629e8132df390ae692873c847d1c2f62c0James Dong                *(p_cur += outpitch) = result;
12261cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* fourth pixel */
12271cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = *(p_ref += inpitch);
12281cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (r3 + r2);
12291cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r3 = (r4 + r1);
12301cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
12311cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r3 = (r5 + r0);
12321cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
12331cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + 512) >> 10;
12341cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result)
12351cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result2 = ((p_ref[dy] + 16) >> 5);
12361cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result2)
12371cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* 3/4 pel,  no need to clip */
12381cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + result2 + 1);
12391cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result >> 1);
12401cc31e629e8132df390ae692873c847d1c2f62c0James Dong                *(p_cur += outpitch) = result;
12411cc31e629e8132df390ae692873c847d1c2f62c0James Dong                p_ref -= (inpitch << 1);  /* move back to center of the filter of the next one */
12421cc31e629e8132df390ae692873c847d1c2f62c0James Dong            }
12431cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_cur += curr_offset;
12441cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
12451cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
12461cc31e629e8132df390ae692873c847d1c2f62c0James Dong    else
12471cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
12481cc31e629e8132df390ae692873c847d1c2f62c0James Dong        for (j = 0; j < blkwidth; j++)
12491cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
12501cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_cur -= outpitch; /* compensate for the first offset */
12511cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_ref = in++;
12521cc31e629e8132df390ae692873c847d1c2f62c0James Dong
12531cc31e629e8132df390ae692873c847d1c2f62c0James Dong            tmp = (uint32)(p_ref + ref_offset); /* limit */
12541cc31e629e8132df390ae692873c847d1c2f62c0James Dong            while ((uint32)p_ref < tmp)
12551cc31e629e8132df390ae692873c847d1c2f62c0James Dong            {                           /* loop un-rolled */
12561cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = *(p_ref - (inpitch << 1));
12571cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = *(p_ref - inpitch);
12581cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = *p_ref;
12591cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r3 = *(p_ref += inpitch);  /* modify pointer before loading */
12601cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r4 = *(p_ref += inpitch);
12611cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* first pixel */
12621cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 = *(p_ref += inpitch);
12631cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (r0 + r5);
12641cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = (r1 + r4);
12651cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
12661cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = (r2 + r3);
12671cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
12681cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + 512) >> 10;
12691cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result)
12701cc31e629e8132df390ae692873c847d1c2f62c0James Dong                *(p_cur += outpitch) = result;
12711cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* second pixel */
12721cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = *(p_ref += inpitch);
12731cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (r1 + r0);
12741cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = (r2 + r5);
12751cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
12761cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = (r3 + r4);
12771cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
12781cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + 512) >> 10;
12791cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result)
12801cc31e629e8132df390ae692873c847d1c2f62c0James Dong                *(p_cur += outpitch) = result;
12811cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* third pixel */
12821cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = *(p_ref += inpitch);
12831cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (r2 + r1);
12841cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = (r3 + r0);
12851cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
12861cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = (r4 + r5);
12871cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
12881cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + 512) >> 10;
12891cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result)
12901cc31e629e8132df390ae692873c847d1c2f62c0James Dong                *(p_cur += outpitch) = result;
12911cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* fourth pixel */
12921cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = *(p_ref += inpitch);
12931cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (r3 + r2);
12941cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r3 = (r4 + r1);
12951cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
12961cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r3 = (r5 + r0);
12971cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
12981cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + 512) >> 10;
12991cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result)
13001cc31e629e8132df390ae692873c847d1c2f62c0James Dong                *(p_cur += outpitch) = result;
13011cc31e629e8132df390ae692873c847d1c2f62c0James Dong                p_ref -= (inpitch << 1);  /* move back to center of the filter of the next one */
13021cc31e629e8132df390ae692873c847d1c2f62c0James Dong            }
13031cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_cur += curr_offset;
13041cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
13051cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
13061cc31e629e8132df390ae692873c847d1c2f62c0James Dong
13071cc31e629e8132df390ae692873c847d1c2f62c0James Dong    return ;
13081cc31e629e8132df390ae692873c847d1c2f62c0James Dong}
13091cc31e629e8132df390ae692873c847d1c2f62c0James Dong
13101cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eDiagonalInterpMC(uint8 *in1, uint8 *in2, int inpitch,
13111cc31e629e8132df390ae692873c847d1c2f62c0James Dong                       uint8 *out, int outpitch,
13121cc31e629e8132df390ae692873c847d1c2f62c0James Dong                       int blkwidth, int blkheight)
13131cc31e629e8132df390ae692873c847d1c2f62c0James Dong{
13141cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int j, i;
13151cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int result;
13161cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint8 *p_cur, *p_ref, *p_tmp8;
13171cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int curr_offset, ref_offset;
13181cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint8 tmp_res[24][24], tmp_in[24][24];
13191cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint32 *p_tmp;
13201cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint32 tmp, pkres, tmp_result;
13211cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int32 r0, r1, r2, r3, r4, r5;
13221cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int32 r6, r7, r8, r9, r10, r13;
13231cc31e629e8132df390ae692873c847d1c2f62c0James Dong
13241cc31e629e8132df390ae692873c847d1c2f62c0James Dong    ref_offset = inpitch - blkwidth;
13251cc31e629e8132df390ae692873c847d1c2f62c0James Dong    p_ref = in1 - 2;
13261cc31e629e8132df390ae692873c847d1c2f62c0James Dong    /* perform horizontal interpolation */
13271cc31e629e8132df390ae692873c847d1c2f62c0James Dong    /* not word-aligned */
13281cc31e629e8132df390ae692873c847d1c2f62c0James Dong    /* It is faster to read 1 byte at time to avoid calling CreateAlign */
13291cc31e629e8132df390ae692873c847d1c2f62c0James Dong    /*  if(((uint32)p_ref)&0x3)
13301cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
13311cc31e629e8132df390ae692873c847d1c2f62c0James Dong            CreateAlign(p_ref,inpitch,0,&tmp_in[0][0],blkwidth+8,blkheight);
13321cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_ref = &tmp_in[0][0];
13331cc31e629e8132df390ae692873c847d1c2f62c0James Dong            ref_offset = 24-blkwidth;
13341cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }*/
13351cc31e629e8132df390ae692873c847d1c2f62c0James Dong
13361cc31e629e8132df390ae692873c847d1c2f62c0James Dong    p_tmp = (uint32*) & (tmp_res[0][0]);
13371cc31e629e8132df390ae692873c847d1c2f62c0James Dong    for (j = blkheight; j > 0; j--)
13381cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
13391cc31e629e8132df390ae692873c847d1c2f62c0James Dong        r13 = 0;
13401cc31e629e8132df390ae692873c847d1c2f62c0James Dong        tmp = (uint32)(p_ref + blkwidth);
13411cc31e629e8132df390ae692873c847d1c2f62c0James Dong
13421cc31e629e8132df390ae692873c847d1c2f62c0James Dong        //r0 = *((uint32*)p_ref);   /* d,c,b,a */
13431cc31e629e8132df390ae692873c847d1c2f62c0James Dong        //r1 = (r0>>8)&0xFF00FF;    /* 0,d,0,b */
13441cc31e629e8132df390ae692873c847d1c2f62c0James Dong        //r0 &= 0xFF00FF;           /* 0,c,0,a */
13451cc31e629e8132df390ae692873c847d1c2f62c0James Dong        /* It is faster to read 1 byte at a time */
13461cc31e629e8132df390ae692873c847d1c2f62c0James Dong        r0 = p_ref[0];
13471cc31e629e8132df390ae692873c847d1c2f62c0James Dong        r1 = p_ref[2];
13481cc31e629e8132df390ae692873c847d1c2f62c0James Dong        r0 |= (r1 << 16);           /* 0,c,0,a */
13491cc31e629e8132df390ae692873c847d1c2f62c0James Dong        r1 = p_ref[1];
13501cc31e629e8132df390ae692873c847d1c2f62c0James Dong        r2 = p_ref[3];
13511cc31e629e8132df390ae692873c847d1c2f62c0James Dong        r1 |= (r2 << 16);           /* 0,d,0,b */
13521cc31e629e8132df390ae692873c847d1c2f62c0James Dong
13531cc31e629e8132df390ae692873c847d1c2f62c0James Dong        while ((uint32)p_ref < tmp)
13541cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
13551cc31e629e8132df390ae692873c847d1c2f62c0James Dong            //r2 = *((uint32*)(p_ref+=4));/* h,g,f,e */
13561cc31e629e8132df390ae692873c847d1c2f62c0James Dong            //r3 = (r2>>8)&0xFF00FF;  /* 0,h,0,f */
13571cc31e629e8132df390ae692873c847d1c2f62c0James Dong            //r2 &= 0xFF00FF;           /* 0,g,0,e */
13581cc31e629e8132df390ae692873c847d1c2f62c0James Dong            /* It is faster to read 1 byte at a time */
13591cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 = *(p_ref += 4);
13601cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r3 = p_ref[2];
13611cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 |= (r3 << 16);           /* 0,g,0,e */
13621cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r3 = p_ref[1];
13631cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r4 = p_ref[3];
13641cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r3 |= (r4 << 16);           /* 0,h,0,f */
13651cc31e629e8132df390ae692873c847d1c2f62c0James Dong
13661cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r4 = r0 + r3;       /* c+h, a+f */
13671cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r5 = r0 + r1;   /* c+d, a+b */
13681cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r6 = r2 + r3;   /* g+h, e+f */
13691cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r5 >>= 16;
13701cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r5 |= (r6 << 16);   /* e+f, c+d */
13711cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r4 += r5 * 20;      /* c+20*e+20*f+h, a+20*c+20*d+f */
13721cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r4 += 0x100010; /* +16, +16 */
13731cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r5 = r1 + r2;       /* d+g, b+e */
13741cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r4 -= r5 * 5;       /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */
13751cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r4 >>= 5;
13761cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r13 |= r4;      /* check clipping */
13771cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r4 &= 0xFF00FF; /* mask */
13781cc31e629e8132df390ae692873c847d1c2f62c0James Dong
13791cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r5 = p_ref[4];  /* i */
13801cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r6 = (r5 << 16);
13811cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r5 = r6 | (r2 >> 16);/* 0,i,0,g */
13821cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r5 += r1;       /* d+i, b+g */ /* r5 not free */
13831cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 >>= 16;
13841cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */
13851cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 += r2;       /* f+g, d+e */
13861cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r5 += 20 * r1;  /* d+20f+20g+i, b+20d+20e+g */
13871cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 >>= 16;
13881cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */
13891cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 += r3;       /* e+h, c+f */
13901cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r5 += 0x100010; /* 16,16 */
13911cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r5 -= r0 * 5;       /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */
13921cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r5 >>= 5;
13931cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r13 |= r5;      /* check clipping */
13941cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r5 &= 0xFF00FF; /* mask */
13951cc31e629e8132df390ae692873c847d1c2f62c0James Dong
13961cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r4 |= (r5 << 8);    /* pack them together */
13971cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *p_tmp++ = r4;
13981cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = r3;
13991cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 = r2;
14001cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
14011cc31e629e8132df390ae692873c847d1c2f62c0James Dong        p_tmp += ((24 - blkwidth) >> 2); /* move to the next line */
14021cc31e629e8132df390ae692873c847d1c2f62c0James Dong        p_ref += ref_offset;  /*    ref_offset = inpitch-blkwidth; */
14031cc31e629e8132df390ae692873c847d1c2f62c0James Dong
14041cc31e629e8132df390ae692873c847d1c2f62c0James Dong        if (r13&0xFF000700) /* need clipping */
14051cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
14061cc31e629e8132df390ae692873c847d1c2f62c0James Dong            /* move back to the beginning of the line */
14071cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_ref -= (ref_offset + blkwidth);   /* input */
14081cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_tmp -= 6; /* intermediate output */
14091cc31e629e8132df390ae692873c847d1c2f62c0James Dong            tmp = (uint32)(p_ref + blkwidth);
14101cc31e629e8132df390ae692873c847d1c2f62c0James Dong            while ((uint32)p_ref < tmp)
14111cc31e629e8132df390ae692873c847d1c2f62c0James Dong            {
14121cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = *p_ref++;
14131cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = *p_ref++;
14141cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = *p_ref++;
14151cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r3 = *p_ref++;
14161cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r4 = *p_ref++;
14171cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* first pixel */
14181cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r5 = *p_ref++;
14191cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (r0 + r5);
14201cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = (r1 + r4);
14211cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
14221cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = (r2 + r3);
14231cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
14241cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + 16) >> 5;
14251cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result)
14261cc31e629e8132df390ae692873c847d1c2f62c0James Dong                pkres = result;
14271cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* second pixel */
14281cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r0 = *p_ref++;
14291cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (r1 + r0);
14301cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = (r2 + r5);
14311cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
14321cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = (r3 + r4);
14331cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
14341cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + 16) >> 5;
14351cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result)
14361cc31e629e8132df390ae692873c847d1c2f62c0James Dong                pkres |= (result << 8);
14371cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* third pixel */
14381cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r1 = *p_ref++;
14391cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (r2 + r1);
14401cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = (r3 + r0);
14411cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
14421cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = (r4 + r5);
14431cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
14441cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + 16) >> 5;
14451cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result)
14461cc31e629e8132df390ae692873c847d1c2f62c0James Dong                pkres |= (result << 16);
14471cc31e629e8132df390ae692873c847d1c2f62c0James Dong                /* fourth pixel */
14481cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r2 = *p_ref++;
14491cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (r3 + r2);
14501cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r3 = (r4 + r1);
14511cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
14521cc31e629e8132df390ae692873c847d1c2f62c0James Dong                r3 = (r5 + r0);
14531cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
14541cc31e629e8132df390ae692873c847d1c2f62c0James Dong                result = (result + 16) >> 5;
14551cc31e629e8132df390ae692873c847d1c2f62c0James Dong                CLIP_RESULT(result)
14561cc31e629e8132df390ae692873c847d1c2f62c0James Dong                pkres |= (result << 24);
14571cc31e629e8132df390ae692873c847d1c2f62c0James Dong
14581cc31e629e8132df390ae692873c847d1c2f62c0James Dong                *p_tmp++ = pkres; /* write 4 pixel */
14591cc31e629e8132df390ae692873c847d1c2f62c0James Dong                p_ref -= 5;
14601cc31e629e8132df390ae692873c847d1c2f62c0James Dong            }
14611cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_tmp += ((24 - blkwidth) >> 2); /* move to the next line */
14621cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_ref += ref_offset;  /*    ref_offset = inpitch-blkwidth; */
14631cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
14641cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
14651cc31e629e8132df390ae692873c847d1c2f62c0James Dong
14661cc31e629e8132df390ae692873c847d1c2f62c0James Dong    /*  perform vertical interpolation */
14671cc31e629e8132df390ae692873c847d1c2f62c0James Dong    /* not word-aligned */
14681cc31e629e8132df390ae692873c847d1c2f62c0James Dong    if (((uint32)in2)&0x3)
14691cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
14701cc31e629e8132df390ae692873c847d1c2f62c0James Dong        eCreateAlign(in2, inpitch, -2, &tmp_in[0][0], blkwidth, blkheight + 5);
14711cc31e629e8132df390ae692873c847d1c2f62c0James Dong        in2 = &tmp_in[2][0];
14721cc31e629e8132df390ae692873c847d1c2f62c0James Dong        inpitch = 24;
14731cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
14741cc31e629e8132df390ae692873c847d1c2f62c0James Dong
14751cc31e629e8132df390ae692873c847d1c2f62c0James Dong    p_cur = out;
14761cc31e629e8132df390ae692873c847d1c2f62c0James Dong    curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically up and one pixel right */
14771cc31e629e8132df390ae692873c847d1c2f62c0James Dong    pkres = blkheight * inpitch; /* reuse it for limit */
14781cc31e629e8132df390ae692873c847d1c2f62c0James Dong
14791cc31e629e8132df390ae692873c847d1c2f62c0James Dong    curr_offset += 3;
14801cc31e629e8132df390ae692873c847d1c2f62c0James Dong
14811cc31e629e8132df390ae692873c847d1c2f62c0James Dong    for (j = 0; j < blkwidth; j += 4, in2 += 4)
14821cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
14831cc31e629e8132df390ae692873c847d1c2f62c0James Dong        r13 = 0;
14841cc31e629e8132df390ae692873c847d1c2f62c0James Dong        p_ref = in2;
14851cc31e629e8132df390ae692873c847d1c2f62c0James Dong        p_tmp8 = &(tmp_res[0][j]); /* intermediate result */
14861cc31e629e8132df390ae692873c847d1c2f62c0James Dong        p_tmp8 -= 24;  /* compensate for the first offset */
14871cc31e629e8132df390ae692873c847d1c2f62c0James Dong        p_cur -= outpitch;  /* compensate for the first offset */
14881cc31e629e8132df390ae692873c847d1c2f62c0James Dong        tmp = (uint32)(p_ref + pkres); /* limit */
14891cc31e629e8132df390ae692873c847d1c2f62c0James Dong        while ((uint32)p_ref < tmp)  /* the loop un-rolled  */
14901cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
14911cc31e629e8132df390ae692873c847d1c2f62c0James Dong            /* Read 1 byte at a time is too slow, too many read and pack ops, need to call CreateAlign */
14921cc31e629e8132df390ae692873c847d1c2f62c0James Dong            /*p_ref8 = p_ref-(inpitch<<1);          r0 = p_ref8[0];         r1 = p_ref8[2];
14931cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 |= (r1<<16);         r6 = p_ref8[1];         r1 = p_ref8[3];
14941cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r6 |= (r1<<16);         p_ref+=inpitch; */
14951cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */
14961cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_ref += inpitch;
14971cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */
14981cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 &= 0xFF00FF;
14991cc31e629e8132df390ae692873c847d1c2f62c0James Dong
15001cc31e629e8132df390ae692873c847d1c2f62c0James Dong            /*p_ref8 = p_ref+(inpitch<<1);
15011cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = p_ref8[0];         r7 = p_ref8[2];         r1 |= (r7<<16);
15021cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r7 = p_ref8[1];         r2 = p_ref8[3];         r7 |= (r2<<16);*/
15031cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = *((uint32*)(p_ref + (inpitch << 1)));  /* r1, r7, ref[3] */
15041cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r7 = (r1 >> 8) & 0xFF00FF;
15051cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 &= 0xFF00FF;
15061cc31e629e8132df390ae692873c847d1c2f62c0James Dong
15071cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 += r1;
15081cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r6 += r7;
15091cc31e629e8132df390ae692873c847d1c2f62c0James Dong
15101cc31e629e8132df390ae692873c847d1c2f62c0James Dong            /*r2 = p_ref[0];            r8 = p_ref[2];          r2 |= (r8<<16);
15111cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r8 = p_ref[1];          r1 = p_ref[3];          r8 |= (r1<<16);*/
15121cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */
15131cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r8 = (r2 >> 8) & 0xFF00FF;
15141cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 &= 0xFF00FF;
15151cc31e629e8132df390ae692873c847d1c2f62c0James Dong
15161cc31e629e8132df390ae692873c847d1c2f62c0James Dong            /*p_ref8 = p_ref-inpitch;           r1 = p_ref8[0];         r7 = p_ref8[2];
15171cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 |= (r7<<16);         r1 += r2;           r7 = p_ref8[1];
15181cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 = p_ref8[3];         r7 |= (r2<<16);*/
15191cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */
15201cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r7 = (r1 >> 8) & 0xFF00FF;
15211cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 &= 0xFF00FF;
15221cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 += r2;
15231cc31e629e8132df390ae692873c847d1c2f62c0James Dong
15241cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r7 += r8;
15251cc31e629e8132df390ae692873c847d1c2f62c0James Dong
15261cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 += 20 * r1;
15271cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r6 += 20 * r7;
15281cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 += 0x100010;
15291cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r6 += 0x100010;
15301cc31e629e8132df390ae692873c847d1c2f62c0James Dong
15311cc31e629e8132df390ae692873c847d1c2f62c0James Dong            /*p_ref8 = p_ref-(inpitch<<1);          r2 = p_ref8[0];         r8 = p_ref8[2];
15321cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 |= (r8<<16);         r8 = p_ref8[1];         r1 = p_ref8[3];         r8 |= (r1<<16);*/
15331cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */
15341cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r8 = (r2 >> 8) & 0xFF00FF;
15351cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 &= 0xFF00FF;
15361cc31e629e8132df390ae692873c847d1c2f62c0James Dong
15371cc31e629e8132df390ae692873c847d1c2f62c0James Dong            /*p_ref8 = p_ref+inpitch;           r1 = p_ref8[0];         r7 = p_ref8[2];
15381cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 |= (r7<<16);         r1 += r2;           r7 = p_ref8[1];
15391cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 = p_ref8[3];         r7 |= (r2<<16);*/
15401cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */
15411cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r7 = (r1 >> 8) & 0xFF00FF;
15421cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 &= 0xFF00FF;
15431cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 += r2;
15441cc31e629e8132df390ae692873c847d1c2f62c0James Dong
15451cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r7 += r8;
15461cc31e629e8132df390ae692873c847d1c2f62c0James Dong
15471cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 -= 5 * r1;
15481cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r6 -= 5 * r7;
15491cc31e629e8132df390ae692873c847d1c2f62c0James Dong
15501cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 >>= 5;
15511cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r6 >>= 5;
15521cc31e629e8132df390ae692873c847d1c2f62c0James Dong            /* clip */
15531cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r13 |= r6;
15541cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r13 |= r0;
15551cc31e629e8132df390ae692873c847d1c2f62c0James Dong            //CLIPPACK(r6,result)
15561cc31e629e8132df390ae692873c847d1c2f62c0James Dong            /* add with horizontal results */
15571cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r10 = *((uint32*)(p_tmp8 += 24));
15581cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r9 = (r10 >> 8) & 0xFF00FF;
15591cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r10 &= 0xFF00FF;
15601cc31e629e8132df390ae692873c847d1c2f62c0James Dong
15611cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 += r10;
15621cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 += 0x10001;
15631cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 = (r0 >> 1) & 0xFF00FF;   /* mask to 8 bytes */
15641cc31e629e8132df390ae692873c847d1c2f62c0James Dong
15651cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r6 += r9;
15661cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r6 += 0x10001;
15671cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r6 = (r6 >> 1) & 0xFF00FF;   /* mask to 8 bytes */
15681cc31e629e8132df390ae692873c847d1c2f62c0James Dong
15691cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 |= (r6 << 8);  /* pack it back */
15701cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *((uint32*)(p_cur += outpitch)) = r0;
15711cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
15721cc31e629e8132df390ae692873c847d1c2f62c0James Dong        p_cur += curr_offset; /* offset to the next pixel */
15731cc31e629e8132df390ae692873c847d1c2f62c0James Dong        if (r13 & 0xFF000700) /* this column need clipping */
15741cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
15751cc31e629e8132df390ae692873c847d1c2f62c0James Dong            p_cur -= 4;
15761cc31e629e8132df390ae692873c847d1c2f62c0James Dong            for (i = 0; i < 4; i++)
15771cc31e629e8132df390ae692873c847d1c2f62c0James Dong            {
15781cc31e629e8132df390ae692873c847d1c2f62c0James Dong                p_ref = in2 + i;
15791cc31e629e8132df390ae692873c847d1c2f62c0James Dong                p_tmp8 = &(tmp_res[0][j+i]); /* intermediate result */
15801cc31e629e8132df390ae692873c847d1c2f62c0James Dong                p_tmp8 -= 24;  /* compensate for the first offset */
15811cc31e629e8132df390ae692873c847d1c2f62c0James Dong                p_cur -= outpitch;  /* compensate for the first offset */
15821cc31e629e8132df390ae692873c847d1c2f62c0James Dong                tmp = (uint32)(p_ref + pkres); /* limit */
15831cc31e629e8132df390ae692873c847d1c2f62c0James Dong                while ((uint32)p_ref < tmp)  /* the loop un-rolled  */
15841cc31e629e8132df390ae692873c847d1c2f62c0James Dong                {
15851cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r0 = *(p_ref - (inpitch << 1));
15861cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r1 = *(p_ref - inpitch);
15871cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r2 = *p_ref;
15881cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r3 = *(p_ref += inpitch);  /* modify pointer before loading */
15891cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r4 = *(p_ref += inpitch);
15901cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    /* first pixel */
15911cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r5 = *(p_ref += inpitch);
15921cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (r0 + r5);
15931cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r0 = (r1 + r4);
15941cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
15951cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r0 = (r2 + r3);
15961cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
15971cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result + 16) >> 5;
15981cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    CLIP_RESULT(result)
15991cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    tmp_result = *(p_tmp8 += 24);  /* modify pointer before loading */
16001cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result + tmp_result + 1);  /* no clip */
16011cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result >> 1);
16021cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    *(p_cur += outpitch) = result;
16031cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    /* second pixel */
16041cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r0 = *(p_ref += inpitch);
16051cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (r1 + r0);
16061cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r1 = (r2 + r5);
16071cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
16081cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r1 = (r3 + r4);
16091cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
16101cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result + 16) >> 5;
16111cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    CLIP_RESULT(result)
16121cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    tmp_result = *(p_tmp8 += 24);  /* intermediate result */
16131cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result + tmp_result + 1);  /* no clip */
16141cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result >> 1);
16151cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    *(p_cur += outpitch) = result;
16161cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    /* third pixel */
16171cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r1 = *(p_ref += inpitch);
16181cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (r2 + r1);
16191cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r2 = (r3 + r0);
16201cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
16211cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r2 = (r4 + r5);
16221cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
16231cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result + 16) >> 5;
16241cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    CLIP_RESULT(result)
16251cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    tmp_result = *(p_tmp8 += 24);  /* intermediate result */
16261cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result + tmp_result + 1);  /* no clip */
16271cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result >> 1);
16281cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    *(p_cur += outpitch) = result;
16291cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    /* fourth pixel */
16301cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r2 = *(p_ref += inpitch);
16311cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (r3 + r2);
16321cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r3 = (r4 + r1);
16331cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
16341cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    r3 = (r5 + r0);
16351cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
16361cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result + 16) >> 5;
16371cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    CLIP_RESULT(result)
16381cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    tmp_result = *(p_tmp8 += 24);  /* intermediate result */
16391cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result + tmp_result + 1);  /* no clip */
16401cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    result = (result >> 1);
16411cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    *(p_cur += outpitch) = result;
16421cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    p_ref -= (inpitch << 1);  /* move back to center of the filter of the next one */
16431cc31e629e8132df390ae692873c847d1c2f62c0James Dong                }
16441cc31e629e8132df390ae692873c847d1c2f62c0James Dong                p_cur += (curr_offset - 3);
16451cc31e629e8132df390ae692873c847d1c2f62c0James Dong            }
16461cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
16471cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
16481cc31e629e8132df390ae692873c847d1c2f62c0James Dong
16491cc31e629e8132df390ae692873c847d1c2f62c0James Dong    return ;
16501cc31e629e8132df390ae692873c847d1c2f62c0James Dong}
16511cc31e629e8132df390ae692873c847d1c2f62c0James Dong
16521cc31e629e8132df390ae692873c847d1c2f62c0James Dong/* position G */
16531cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eFullPelMC(uint8 *in, int inpitch, uint8 *out, int outpitch,
16541cc31e629e8132df390ae692873c847d1c2f62c0James Dong                int blkwidth, int blkheight)
16551cc31e629e8132df390ae692873c847d1c2f62c0James Dong{
16561cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int i, j;
16571cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int offset_in = inpitch - blkwidth;
16581cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int offset_out = outpitch - blkwidth;
16591cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint32 temp;
16601cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint8 byte;
16611cc31e629e8132df390ae692873c847d1c2f62c0James Dong
16621cc31e629e8132df390ae692873c847d1c2f62c0James Dong    if (((uint32)in)&3)
16631cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
16641cc31e629e8132df390ae692873c847d1c2f62c0James Dong        for (j = blkheight; j > 0; j--)
16651cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
16661cc31e629e8132df390ae692873c847d1c2f62c0James Dong            for (i = blkwidth; i > 0; i -= 4)
16671cc31e629e8132df390ae692873c847d1c2f62c0James Dong            {
16681cc31e629e8132df390ae692873c847d1c2f62c0James Dong                temp = *in++;
16691cc31e629e8132df390ae692873c847d1c2f62c0James Dong                byte = *in++;
16701cc31e629e8132df390ae692873c847d1c2f62c0James Dong                temp |= (byte << 8);
16711cc31e629e8132df390ae692873c847d1c2f62c0James Dong                byte = *in++;
16721cc31e629e8132df390ae692873c847d1c2f62c0James Dong                temp |= (byte << 16);
16731cc31e629e8132df390ae692873c847d1c2f62c0James Dong                byte = *in++;
16741cc31e629e8132df390ae692873c847d1c2f62c0James Dong                temp |= (byte << 24);
16751cc31e629e8132df390ae692873c847d1c2f62c0James Dong
16761cc31e629e8132df390ae692873c847d1c2f62c0James Dong                *((uint32*)out) = temp; /* write 4 bytes */
16771cc31e629e8132df390ae692873c847d1c2f62c0James Dong                out += 4;
16781cc31e629e8132df390ae692873c847d1c2f62c0James Dong            }
16791cc31e629e8132df390ae692873c847d1c2f62c0James Dong            out += offset_out;
16801cc31e629e8132df390ae692873c847d1c2f62c0James Dong            in += offset_in;
16811cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
16821cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
16831cc31e629e8132df390ae692873c847d1c2f62c0James Dong    else
16841cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
16851cc31e629e8132df390ae692873c847d1c2f62c0James Dong        for (j = blkheight; j > 0; j--)
16861cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
16871cc31e629e8132df390ae692873c847d1c2f62c0James Dong            for (i = blkwidth; i > 0; i -= 4)
16881cc31e629e8132df390ae692873c847d1c2f62c0James Dong            {
16891cc31e629e8132df390ae692873c847d1c2f62c0James Dong                temp = *((uint32*)in);
16901cc31e629e8132df390ae692873c847d1c2f62c0James Dong                *((uint32*)out) = temp;
16911cc31e629e8132df390ae692873c847d1c2f62c0James Dong                in += 4;
16921cc31e629e8132df390ae692873c847d1c2f62c0James Dong                out += 4;
16931cc31e629e8132df390ae692873c847d1c2f62c0James Dong            }
16941cc31e629e8132df390ae692873c847d1c2f62c0James Dong            out += offset_out;
16951cc31e629e8132df390ae692873c847d1c2f62c0James Dong            in += offset_in;
16961cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
16971cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
16981cc31e629e8132df390ae692873c847d1c2f62c0James Dong    return ;
16991cc31e629e8132df390ae692873c847d1c2f62c0James Dong}
17001cc31e629e8132df390ae692873c847d1c2f62c0James Dong
17011cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid ePadChroma(uint8 *ref, int picwidth, int picheight, int picpitch, int x_pos, int y_pos)
17021cc31e629e8132df390ae692873c847d1c2f62c0James Dong{
17031cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int pad_height;
17041cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int pad_width;
17051cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint8 *start;
17061cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint32 word1, word2, word3;
17071cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int offset, j;
17081cc31e629e8132df390ae692873c847d1c2f62c0James Dong
17091cc31e629e8132df390ae692873c847d1c2f62c0James Dong
17101cc31e629e8132df390ae692873c847d1c2f62c0James Dong    pad_height = 8 + ((y_pos & 7) ? 1 : 0);
17111cc31e629e8132df390ae692873c847d1c2f62c0James Dong    pad_width = 8 + ((x_pos & 7) ? 1 : 0);
17121cc31e629e8132df390ae692873c847d1c2f62c0James Dong
17131cc31e629e8132df390ae692873c847d1c2f62c0James Dong    y_pos >>= 3;
17141cc31e629e8132df390ae692873c847d1c2f62c0James Dong    x_pos >>= 3;
17151cc31e629e8132df390ae692873c847d1c2f62c0James Dong    // pad vertical first
17161cc31e629e8132df390ae692873c847d1c2f62c0James Dong    if (y_pos < 0) // need to pad up
17171cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
17181cc31e629e8132df390ae692873c847d1c2f62c0James Dong        if (x_pos < -8) start = ref - 8;
17191cc31e629e8132df390ae692873c847d1c2f62c0James Dong        else if (x_pos + pad_width > picwidth + 7) start = ref + picwidth + 7 - pad_width;
17201cc31e629e8132df390ae692873c847d1c2f62c0James Dong        else start = ref + x_pos;
17211cc31e629e8132df390ae692873c847d1c2f62c0James Dong
17221cc31e629e8132df390ae692873c847d1c2f62c0James Dong        /* word-align start */
17231cc31e629e8132df390ae692873c847d1c2f62c0James Dong        offset = (uint32)start & 0x3;
17241cc31e629e8132df390ae692873c847d1c2f62c0James Dong        if (offset) start -= offset;
17251cc31e629e8132df390ae692873c847d1c2f62c0James Dong
17261cc31e629e8132df390ae692873c847d1c2f62c0James Dong        word1 = *((uint32*)start);
17271cc31e629e8132df390ae692873c847d1c2f62c0James Dong        word2 = *((uint32*)(start + 4));
17281cc31e629e8132df390ae692873c847d1c2f62c0James Dong        word3 = *((uint32*)(start + 8));
17291cc31e629e8132df390ae692873c847d1c2f62c0James Dong
17301cc31e629e8132df390ae692873c847d1c2f62c0James Dong        /* pad up N rows */
17311cc31e629e8132df390ae692873c847d1c2f62c0James Dong        j = -y_pos;
17321cc31e629e8132df390ae692873c847d1c2f62c0James Dong        if (j > 8) j = 8;
17331cc31e629e8132df390ae692873c847d1c2f62c0James Dong        while (j--)
17341cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
17351cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *((uint32*)(start -= picpitch)) = word1;
17361cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *((uint32*)(start + 4)) = word2;
17371cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *((uint32*)(start + 8)) = word3;
17381cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
17391cc31e629e8132df390ae692873c847d1c2f62c0James Dong
17401cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
17411cc31e629e8132df390ae692873c847d1c2f62c0James Dong    else if (y_pos + pad_height >= picheight) /* pad down */
17421cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
17431cc31e629e8132df390ae692873c847d1c2f62c0James Dong        if (x_pos < -8) start = ref + picpitch * (picheight - 1) - 8;
17441cc31e629e8132df390ae692873c847d1c2f62c0James Dong        else if (x_pos + pad_width > picwidth + 7) start = ref + picpitch * (picheight - 1) +
17451cc31e629e8132df390ae692873c847d1c2f62c0James Dong                    picwidth + 7 - pad_width;
17461cc31e629e8132df390ae692873c847d1c2f62c0James Dong        else    start = ref + picpitch * (picheight - 1) + x_pos;
17471cc31e629e8132df390ae692873c847d1c2f62c0James Dong
17481cc31e629e8132df390ae692873c847d1c2f62c0James Dong        /* word-align start */
17491cc31e629e8132df390ae692873c847d1c2f62c0James Dong        offset = (uint32)start & 0x3;
17501cc31e629e8132df390ae692873c847d1c2f62c0James Dong        if (offset) start -= offset;
17511cc31e629e8132df390ae692873c847d1c2f62c0James Dong
17521cc31e629e8132df390ae692873c847d1c2f62c0James Dong        word1 = *((uint32*)start);
17531cc31e629e8132df390ae692873c847d1c2f62c0James Dong        word2 = *((uint32*)(start + 4));
17541cc31e629e8132df390ae692873c847d1c2f62c0James Dong        word3 = *((uint32*)(start + 8));
17551cc31e629e8132df390ae692873c847d1c2f62c0James Dong
17561cc31e629e8132df390ae692873c847d1c2f62c0James Dong        /* pad down N rows */
17571cc31e629e8132df390ae692873c847d1c2f62c0James Dong        j = y_pos + pad_height - picheight;
17581cc31e629e8132df390ae692873c847d1c2f62c0James Dong        if (j > 8) j = 8;
17591cc31e629e8132df390ae692873c847d1c2f62c0James Dong        while (j--)
17601cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
17611cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *((uint32*)(start += picpitch)) = word1;
17621cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *((uint32*)(start + 4)) = word2;
17631cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *((uint32*)(start + 8)) = word3;
17641cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
17651cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
17661cc31e629e8132df390ae692873c847d1c2f62c0James Dong
17671cc31e629e8132df390ae692873c847d1c2f62c0James Dong    /* now pad horizontal */
17681cc31e629e8132df390ae692873c847d1c2f62c0James Dong    if (x_pos < 0) // pad left
17691cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
17701cc31e629e8132df390ae692873c847d1c2f62c0James Dong        if (y_pos < -8) start = ref - (picpitch << 3);
17711cc31e629e8132df390ae692873c847d1c2f62c0James Dong        else if (y_pos + pad_height > picheight + 7) start = ref + (picheight + 7 - pad_height) * picpitch;
17721cc31e629e8132df390ae692873c847d1c2f62c0James Dong        else start = ref + y_pos * picpitch;
17731cc31e629e8132df390ae692873c847d1c2f62c0James Dong
17741cc31e629e8132df390ae692873c847d1c2f62c0James Dong        // now pad left 8 pixels for pad_height rows */
17751cc31e629e8132df390ae692873c847d1c2f62c0James Dong        j = pad_height;
17761cc31e629e8132df390ae692873c847d1c2f62c0James Dong        start -= picpitch;
17771cc31e629e8132df390ae692873c847d1c2f62c0James Dong        while (j--)
17781cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
17791cc31e629e8132df390ae692873c847d1c2f62c0James Dong            word1 = *(start += picpitch);
17801cc31e629e8132df390ae692873c847d1c2f62c0James Dong            word1 |= (word1 << 8);
17811cc31e629e8132df390ae692873c847d1c2f62c0James Dong            word1 |= (word1 << 16);
17821cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *((uint32*)(start - 8)) = word1;
17831cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *((uint32*)(start - 4)) = word1;
17841cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
17851cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
17861cc31e629e8132df390ae692873c847d1c2f62c0James Dong    else if (x_pos + pad_width >= picwidth) /* pad right */
17871cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
17881cc31e629e8132df390ae692873c847d1c2f62c0James Dong        if (y_pos < -8) start = ref - (picpitch << 3) + picwidth - 1;
17891cc31e629e8132df390ae692873c847d1c2f62c0James Dong        else if (y_pos + pad_height > picheight + 7) start = ref + (picheight + 7 - pad_height) * picpitch + picwidth - 1;
17901cc31e629e8132df390ae692873c847d1c2f62c0James Dong        else start = ref + y_pos * picpitch + picwidth - 1;
17911cc31e629e8132df390ae692873c847d1c2f62c0James Dong
17921cc31e629e8132df390ae692873c847d1c2f62c0James Dong        // now pad right 8 pixels for pad_height rows */
17931cc31e629e8132df390ae692873c847d1c2f62c0James Dong        j = pad_height;
17941cc31e629e8132df390ae692873c847d1c2f62c0James Dong        start -= picpitch;
17951cc31e629e8132df390ae692873c847d1c2f62c0James Dong        while (j--)
17961cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
17971cc31e629e8132df390ae692873c847d1c2f62c0James Dong            word1 = *(start += picpitch);
17981cc31e629e8132df390ae692873c847d1c2f62c0James Dong            word1 |= (word1 << 8);
17991cc31e629e8132df390ae692873c847d1c2f62c0James Dong            word1 |= (word1 << 16);
18001cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *((uint32*)(start + 1)) = word1;
18011cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *((uint32*)(start + 5)) = word1;
18021cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
18031cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
18041cc31e629e8132df390ae692873c847d1c2f62c0James Dong
18051cc31e629e8132df390ae692873c847d1c2f62c0James Dong    return ;
18061cc31e629e8132df390ae692873c847d1c2f62c0James Dong}
18071cc31e629e8132df390ae692873c847d1c2f62c0James Dong
18081cc31e629e8132df390ae692873c847d1c2f62c0James Dong
18091cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eChromaMotionComp(uint8 *ref, int picwidth, int picheight,
18101cc31e629e8132df390ae692873c847d1c2f62c0James Dong                       int x_pos, int y_pos,
18111cc31e629e8132df390ae692873c847d1c2f62c0James Dong                       uint8 *pred, int picpitch,
18121cc31e629e8132df390ae692873c847d1c2f62c0James Dong                       int blkwidth, int blkheight)
18131cc31e629e8132df390ae692873c847d1c2f62c0James Dong{
18141cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int dx, dy;
18151cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int offset_dx, offset_dy;
18161cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int index;
18171cc31e629e8132df390ae692873c847d1c2f62c0James Dong
18181cc31e629e8132df390ae692873c847d1c2f62c0James Dong    ePadChroma(ref, picwidth, picheight, picpitch, x_pos, y_pos);
18191cc31e629e8132df390ae692873c847d1c2f62c0James Dong
18201cc31e629e8132df390ae692873c847d1c2f62c0James Dong    dx = x_pos & 7;
18211cc31e629e8132df390ae692873c847d1c2f62c0James Dong    dy = y_pos & 7;
18221cc31e629e8132df390ae692873c847d1c2f62c0James Dong    offset_dx = (dx + 7) >> 3;
18231cc31e629e8132df390ae692873c847d1c2f62c0James Dong    offset_dy = (dy + 7) >> 3;
18241cc31e629e8132df390ae692873c847d1c2f62c0James Dong    x_pos = x_pos >> 3;  /* round it to full-pel resolution */
18251cc31e629e8132df390ae692873c847d1c2f62c0James Dong    y_pos = y_pos >> 3;
18261cc31e629e8132df390ae692873c847d1c2f62c0James Dong
18271cc31e629e8132df390ae692873c847d1c2f62c0James Dong    ref += y_pos * picpitch + x_pos;
18281cc31e629e8132df390ae692873c847d1c2f62c0James Dong
18291cc31e629e8132df390ae692873c847d1c2f62c0James Dong    index = offset_dx + (offset_dy << 1) + ((blkwidth << 1) & 0x7);
18301cc31e629e8132df390ae692873c847d1c2f62c0James Dong
18311cc31e629e8132df390ae692873c847d1c2f62c0James Dong    (*(eChromaMC_SIMD[index]))(ref, picpitch , dx, dy, pred, picpitch, blkwidth, blkheight);
18321cc31e629e8132df390ae692873c847d1c2f62c0James Dong    return ;
18331cc31e629e8132df390ae692873c847d1c2f62c0James Dong}
18341cc31e629e8132df390ae692873c847d1c2f62c0James Dong
18351cc31e629e8132df390ae692873c847d1c2f62c0James Dong
18361cc31e629e8132df390ae692873c847d1c2f62c0James Dong/* SIMD routines, unroll the loops in vertical direction, decreasing loops (things to be done) */
18371cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eChromaDiagonalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
18381cc31e629e8132df390ae692873c847d1c2f62c0James Dong                            uint8 *pOut, int predPitch, int blkwidth, int blkheight)
18391cc31e629e8132df390ae692873c847d1c2f62c0James Dong{
18401cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int32 r0, r1, r2, r3, result0, result1;
18411cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint8 temp[288];
18421cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint8 *ref, *out;
18431cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int i, j;
18441cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int dx_8 = 8 - dx;
18451cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int dy_8 = 8 - dy;
18461cc31e629e8132df390ae692873c847d1c2f62c0James Dong
18471cc31e629e8132df390ae692873c847d1c2f62c0James Dong    /* horizontal first */
18481cc31e629e8132df390ae692873c847d1c2f62c0James Dong    out = temp;
18491cc31e629e8132df390ae692873c847d1c2f62c0James Dong    for (i = 0; i < blkheight + 1; i++)
18501cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
18511cc31e629e8132df390ae692873c847d1c2f62c0James Dong        ref = pRef;
18521cc31e629e8132df390ae692873c847d1c2f62c0James Dong        r0 = ref[0];
18531cc31e629e8132df390ae692873c847d1c2f62c0James Dong        for (j = 0; j < blkwidth; j += 4)
18541cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
18551cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 |= (ref[2] << 16);
18561cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result0 = dx_8 * r0;
18571cc31e629e8132df390ae692873c847d1c2f62c0James Dong
18581cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = ref[1] | (ref[3] << 16);
18591cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result0 += dx * r1;
18601cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *(int32 *)out = result0;
18611cc31e629e8132df390ae692873c847d1c2f62c0James Dong
18621cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result0 = dx_8 * r1;
18631cc31e629e8132df390ae692873c847d1c2f62c0James Dong
18641cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 = ref[4];
18651cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 = r0 >> 16;
18661cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = r0 | (r2 << 16);
18671cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result0 += dx * r1;
18681cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *(int32 *)(out + 16) = result0;
18691cc31e629e8132df390ae692873c847d1c2f62c0James Dong
18701cc31e629e8132df390ae692873c847d1c2f62c0James Dong            ref += 4;
18711cc31e629e8132df390ae692873c847d1c2f62c0James Dong            out += 4;
18721cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 = r2;
18731cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
18741cc31e629e8132df390ae692873c847d1c2f62c0James Dong        pRef += srcPitch;
18751cc31e629e8132df390ae692873c847d1c2f62c0James Dong        out += (32 - blkwidth);
18761cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
18771cc31e629e8132df390ae692873c847d1c2f62c0James Dong
18781cc31e629e8132df390ae692873c847d1c2f62c0James Dong//  pRef -= srcPitch*(blkheight+1);
18791cc31e629e8132df390ae692873c847d1c2f62c0James Dong    ref = temp;
18801cc31e629e8132df390ae692873c847d1c2f62c0James Dong
18811cc31e629e8132df390ae692873c847d1c2f62c0James Dong    for (j = 0; j < blkwidth; j += 4)
18821cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
18831cc31e629e8132df390ae692873c847d1c2f62c0James Dong        r0 = *(int32 *)ref;
18841cc31e629e8132df390ae692873c847d1c2f62c0James Dong        r1 = *(int32 *)(ref + 16);
18851cc31e629e8132df390ae692873c847d1c2f62c0James Dong        ref += 32;
18861cc31e629e8132df390ae692873c847d1c2f62c0James Dong        out = pOut;
18871cc31e629e8132df390ae692873c847d1c2f62c0James Dong        for (i = 0; i < (blkheight >> 1); i++)
18881cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
18891cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result0 = dy_8 * r0 + 0x00200020;
18901cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 = *(int32 *)ref;
18911cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result0 += dy * r2;
18921cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result0 >>= 6;
18931cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result0 &= 0x00FF00FF;
18941cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 = r2;
18951cc31e629e8132df390ae692873c847d1c2f62c0James Dong
18961cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result1 = dy_8 * r1 + 0x00200020;
18971cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r3 = *(int32 *)(ref + 16);
18981cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result1 += dy * r3;
18991cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result1 >>= 6;
19001cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result1 &= 0x00FF00FF;
19011cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = r3;
19021cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *(int32 *)out = result0 | (result1 << 8);
19031cc31e629e8132df390ae692873c847d1c2f62c0James Dong            out += predPitch;
19041cc31e629e8132df390ae692873c847d1c2f62c0James Dong            ref += 32;
19051cc31e629e8132df390ae692873c847d1c2f62c0James Dong
19061cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result0 = dy_8 * r0 + 0x00200020;
19071cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 = *(int32 *)ref;
19081cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result0 += dy * r2;
19091cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result0 >>= 6;
19101cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result0 &= 0x00FF00FF;
19111cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 = r2;
19121cc31e629e8132df390ae692873c847d1c2f62c0James Dong
19131cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result1 = dy_8 * r1 + 0x00200020;
19141cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r3 = *(int32 *)(ref + 16);
19151cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result1 += dy * r3;
19161cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result1 >>= 6;
19171cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result1 &= 0x00FF00FF;
19181cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = r3;
19191cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *(int32 *)out = result0 | (result1 << 8);
19201cc31e629e8132df390ae692873c847d1c2f62c0James Dong            out += predPitch;
19211cc31e629e8132df390ae692873c847d1c2f62c0James Dong            ref += 32;
19221cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
19231cc31e629e8132df390ae692873c847d1c2f62c0James Dong        pOut += 4;
19241cc31e629e8132df390ae692873c847d1c2f62c0James Dong        ref = temp + 4; /* since it can only iterate twice max */
19251cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
19261cc31e629e8132df390ae692873c847d1c2f62c0James Dong    return;
19271cc31e629e8132df390ae692873c847d1c2f62c0James Dong}
19281cc31e629e8132df390ae692873c847d1c2f62c0James Dong
19291cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eChromaHorizontalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
19301cc31e629e8132df390ae692873c847d1c2f62c0James Dong                              uint8 *pOut, int predPitch, int blkwidth, int blkheight)
19311cc31e629e8132df390ae692873c847d1c2f62c0James Dong{
19321cc31e629e8132df390ae692873c847d1c2f62c0James Dong    (void)(dy);
19331cc31e629e8132df390ae692873c847d1c2f62c0James Dong
19341cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int32 r0, r1, r2, result0, result1;
19351cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint8 *ref, *out;
19361cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int i, j;
19371cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int dx_8 = 8 - dx;
19381cc31e629e8132df390ae692873c847d1c2f62c0James Dong
19391cc31e629e8132df390ae692873c847d1c2f62c0James Dong    /* horizontal first */
19401cc31e629e8132df390ae692873c847d1c2f62c0James Dong    for (i = 0; i < blkheight; i++)
19411cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
19421cc31e629e8132df390ae692873c847d1c2f62c0James Dong        ref = pRef;
19431cc31e629e8132df390ae692873c847d1c2f62c0James Dong        out = pOut;
19441cc31e629e8132df390ae692873c847d1c2f62c0James Dong
19451cc31e629e8132df390ae692873c847d1c2f62c0James Dong        r0 = ref[0];
19461cc31e629e8132df390ae692873c847d1c2f62c0James Dong        for (j = 0; j < blkwidth; j += 4)
19471cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
19481cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 |= (ref[2] << 16);
19491cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result0 = dx_8 * r0 + 0x00040004;
19501cc31e629e8132df390ae692873c847d1c2f62c0James Dong
19511cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = ref[1] | (ref[3] << 16);
19521cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result0 += dx * r1;
19531cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result0 >>= 3;
19541cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result0 &= 0x00FF00FF;
19551cc31e629e8132df390ae692873c847d1c2f62c0James Dong
19561cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result1 = dx_8 * r1 + 0x00040004;
19571cc31e629e8132df390ae692873c847d1c2f62c0James Dong
19581cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 = ref[4];
19591cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 = r0 >> 16;
19601cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = r0 | (r2 << 16);
19611cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result1 += dx * r1;
19621cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result1 >>= 3;
19631cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result1 &= 0x00FF00FF;
19641cc31e629e8132df390ae692873c847d1c2f62c0James Dong
19651cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *(int32 *)out = result0 | (result1 << 8);
19661cc31e629e8132df390ae692873c847d1c2f62c0James Dong
19671cc31e629e8132df390ae692873c847d1c2f62c0James Dong            ref += 4;
19681cc31e629e8132df390ae692873c847d1c2f62c0James Dong            out += 4;
19691cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 = r2;
19701cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
19711cc31e629e8132df390ae692873c847d1c2f62c0James Dong
19721cc31e629e8132df390ae692873c847d1c2f62c0James Dong        pRef += srcPitch;
19731cc31e629e8132df390ae692873c847d1c2f62c0James Dong        pOut += predPitch;
19741cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
19751cc31e629e8132df390ae692873c847d1c2f62c0James Dong    return;
19761cc31e629e8132df390ae692873c847d1c2f62c0James Dong}
19771cc31e629e8132df390ae692873c847d1c2f62c0James Dong
19781cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eChromaVerticalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
19791cc31e629e8132df390ae692873c847d1c2f62c0James Dong                            uint8 *pOut, int predPitch, int blkwidth, int blkheight)
19801cc31e629e8132df390ae692873c847d1c2f62c0James Dong{
19811cc31e629e8132df390ae692873c847d1c2f62c0James Dong    (void)(dx);
19821cc31e629e8132df390ae692873c847d1c2f62c0James Dong
19831cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int32 r0, r1, r2, r3, result0, result1;
19841cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int i, j;
19851cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint8 *ref, *out;
19861cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int dy_8 = 8 - dy;
19871cc31e629e8132df390ae692873c847d1c2f62c0James Dong    /* vertical first */
19881cc31e629e8132df390ae692873c847d1c2f62c0James Dong    for (i = 0; i < blkwidth; i += 4)
19891cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
19901cc31e629e8132df390ae692873c847d1c2f62c0James Dong        ref = pRef;
19911cc31e629e8132df390ae692873c847d1c2f62c0James Dong        out = pOut;
19921cc31e629e8132df390ae692873c847d1c2f62c0James Dong
19931cc31e629e8132df390ae692873c847d1c2f62c0James Dong        r0 = ref[0] | (ref[2] << 16);
19941cc31e629e8132df390ae692873c847d1c2f62c0James Dong        r1 = ref[1] | (ref[3] << 16);
19951cc31e629e8132df390ae692873c847d1c2f62c0James Dong        ref += srcPitch;
19961cc31e629e8132df390ae692873c847d1c2f62c0James Dong        for (j = 0; j < blkheight; j++)
19971cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
19981cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result0 = dy_8 * r0 + 0x00040004;
19991cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r2 = ref[0] | (ref[2] << 16);
20001cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result0 += dy * r2;
20011cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result0 >>= 3;
20021cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result0 &= 0x00FF00FF;
20031cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r0 = r2;
20041cc31e629e8132df390ae692873c847d1c2f62c0James Dong
20051cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result1 = dy_8 * r1 + 0x00040004;
20061cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r3 = ref[1] | (ref[3] << 16);
20071cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result1 += dy * r3;
20081cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result1 >>= 3;
20091cc31e629e8132df390ae692873c847d1c2f62c0James Dong            result1 &= 0x00FF00FF;
20101cc31e629e8132df390ae692873c847d1c2f62c0James Dong            r1 = r3;
20111cc31e629e8132df390ae692873c847d1c2f62c0James Dong            *(int32 *)out = result0 | (result1 << 8);
20121cc31e629e8132df390ae692873c847d1c2f62c0James Dong            ref += srcPitch;
20131cc31e629e8132df390ae692873c847d1c2f62c0James Dong            out += predPitch;
20141cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
20151cc31e629e8132df390ae692873c847d1c2f62c0James Dong        pOut += 4;
20161cc31e629e8132df390ae692873c847d1c2f62c0James Dong        pRef += 4;
20171cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
20181cc31e629e8132df390ae692873c847d1c2f62c0James Dong    return;
20191cc31e629e8132df390ae692873c847d1c2f62c0James Dong}
20201cc31e629e8132df390ae692873c847d1c2f62c0James Dong
20211cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eChromaDiagonalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
20221cc31e629e8132df390ae692873c847d1c2f62c0James Dong                             uint8 *pOut,  int predPitch, int blkwidth, int blkheight)
20231cc31e629e8132df390ae692873c847d1c2f62c0James Dong{
20241cc31e629e8132df390ae692873c847d1c2f62c0James Dong    (void)(blkwidth);
20251cc31e629e8132df390ae692873c847d1c2f62c0James Dong
20261cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int32 r0, r1, temp0, temp1, result;
20271cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int32 temp[9];
20281cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int32 *out;
20291cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int i, r_temp;
20301cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int dy_8 = 8 - dy;
20311cc31e629e8132df390ae692873c847d1c2f62c0James Dong
20321cc31e629e8132df390ae692873c847d1c2f62c0James Dong    /* horizontal first */
20331cc31e629e8132df390ae692873c847d1c2f62c0James Dong    out = temp;
20341cc31e629e8132df390ae692873c847d1c2f62c0James Dong    for (i = 0; i < blkheight + 1; i++)
20351cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
20361cc31e629e8132df390ae692873c847d1c2f62c0James Dong        r_temp = pRef[1];
20371cc31e629e8132df390ae692873c847d1c2f62c0James Dong        temp0 = (pRef[0] << 3) + dx * (r_temp - pRef[0]);
20381cc31e629e8132df390ae692873c847d1c2f62c0James Dong        temp1 = (r_temp << 3) + dx * (pRef[2] - r_temp);
20391cc31e629e8132df390ae692873c847d1c2f62c0James Dong        r0 = temp0 | (temp1 << 16);
20401cc31e629e8132df390ae692873c847d1c2f62c0James Dong        *out++ = r0;
20411cc31e629e8132df390ae692873c847d1c2f62c0James Dong        pRef += srcPitch;
20421cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
20431cc31e629e8132df390ae692873c847d1c2f62c0James Dong
20441cc31e629e8132df390ae692873c847d1c2f62c0James Dong    pRef -= srcPitch * (blkheight + 1);
20451cc31e629e8132df390ae692873c847d1c2f62c0James Dong
20461cc31e629e8132df390ae692873c847d1c2f62c0James Dong    out = temp;
20471cc31e629e8132df390ae692873c847d1c2f62c0James Dong
20481cc31e629e8132df390ae692873c847d1c2f62c0James Dong    r0 = *out++;
20491cc31e629e8132df390ae692873c847d1c2f62c0James Dong
20501cc31e629e8132df390ae692873c847d1c2f62c0James Dong    for (i = 0; i < blkheight; i++)
20511cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
20521cc31e629e8132df390ae692873c847d1c2f62c0James Dong        result = dy_8 * r0 + 0x00200020;
20531cc31e629e8132df390ae692873c847d1c2f62c0James Dong        r1 = *out++;
20541cc31e629e8132df390ae692873c847d1c2f62c0James Dong        result += dy * r1;
20551cc31e629e8132df390ae692873c847d1c2f62c0James Dong        result >>= 6;
20561cc31e629e8132df390ae692873c847d1c2f62c0James Dong        result &= 0x00FF00FF;
20571cc31e629e8132df390ae692873c847d1c2f62c0James Dong        *(int16 *)pOut = (result >> 8) | (result & 0xFF);
20581cc31e629e8132df390ae692873c847d1c2f62c0James Dong        r0 = r1;
20591cc31e629e8132df390ae692873c847d1c2f62c0James Dong        pOut += predPitch;
20601cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
20611cc31e629e8132df390ae692873c847d1c2f62c0James Dong    return;
20621cc31e629e8132df390ae692873c847d1c2f62c0James Dong}
20631cc31e629e8132df390ae692873c847d1c2f62c0James Dong
20641cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eChromaHorizontalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
20651cc31e629e8132df390ae692873c847d1c2f62c0James Dong                               uint8 *pOut, int predPitch, int blkwidth, int blkheight)
20661cc31e629e8132df390ae692873c847d1c2f62c0James Dong{
20671cc31e629e8132df390ae692873c847d1c2f62c0James Dong    (void)(dy);
20681cc31e629e8132df390ae692873c847d1c2f62c0James Dong    (void)(blkwidth);
20691cc31e629e8132df390ae692873c847d1c2f62c0James Dong
20701cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int i, temp, temp0, temp1;
20711cc31e629e8132df390ae692873c847d1c2f62c0James Dong
20721cc31e629e8132df390ae692873c847d1c2f62c0James Dong    /* horizontal first */
20731cc31e629e8132df390ae692873c847d1c2f62c0James Dong    for (i = 0; i < blkheight; i++)
20741cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
20751cc31e629e8132df390ae692873c847d1c2f62c0James Dong        temp = pRef[1];
20761cc31e629e8132df390ae692873c847d1c2f62c0James Dong        temp0 = ((pRef[0] << 3) + dx * (temp - pRef[0]) + 4) >> 3;
20771cc31e629e8132df390ae692873c847d1c2f62c0James Dong        temp1 = ((temp << 3) + dx * (pRef[2] - temp) + 4) >> 3;
20781cc31e629e8132df390ae692873c847d1c2f62c0James Dong
20791cc31e629e8132df390ae692873c847d1c2f62c0James Dong        *(int16 *)pOut = temp0 | (temp1 << 8);
20801cc31e629e8132df390ae692873c847d1c2f62c0James Dong        pRef += srcPitch;
20811cc31e629e8132df390ae692873c847d1c2f62c0James Dong        pOut += predPitch;
20821cc31e629e8132df390ae692873c847d1c2f62c0James Dong
20831cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
20841cc31e629e8132df390ae692873c847d1c2f62c0James Dong    return;
20851cc31e629e8132df390ae692873c847d1c2f62c0James Dong}
20861cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eChromaVerticalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
20871cc31e629e8132df390ae692873c847d1c2f62c0James Dong                             uint8 *pOut, int predPitch, int blkwidth, int blkheight)
20881cc31e629e8132df390ae692873c847d1c2f62c0James Dong{
20891cc31e629e8132df390ae692873c847d1c2f62c0James Dong    (void)(dx);
20901cc31e629e8132df390ae692873c847d1c2f62c0James Dong    (void)(blkwidth);
20911cc31e629e8132df390ae692873c847d1c2f62c0James Dong
20921cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int32 r0, r1, result;
20931cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int i;
20941cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int dy_8 = 8 - dy;
20951cc31e629e8132df390ae692873c847d1c2f62c0James Dong    r0 = pRef[0] | (pRef[1] << 16);
20961cc31e629e8132df390ae692873c847d1c2f62c0James Dong    pRef += srcPitch;
20971cc31e629e8132df390ae692873c847d1c2f62c0James Dong    for (i = 0; i < blkheight; i++)
20981cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
20991cc31e629e8132df390ae692873c847d1c2f62c0James Dong        result = dy_8 * r0 + 0x00040004;
21001cc31e629e8132df390ae692873c847d1c2f62c0James Dong        r1 = pRef[0] | (pRef[1] << 16);
21011cc31e629e8132df390ae692873c847d1c2f62c0James Dong        result += dy * r1;
21021cc31e629e8132df390ae692873c847d1c2f62c0James Dong        result >>= 3;
21031cc31e629e8132df390ae692873c847d1c2f62c0James Dong        result &= 0x00FF00FF;
21041cc31e629e8132df390ae692873c847d1c2f62c0James Dong        *(int16 *)pOut = (result >> 8) | (result & 0xFF);
21051cc31e629e8132df390ae692873c847d1c2f62c0James Dong        r0 = r1;
21061cc31e629e8132df390ae692873c847d1c2f62c0James Dong        pRef += srcPitch;
21071cc31e629e8132df390ae692873c847d1c2f62c0James Dong        pOut += predPitch;
21081cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
21091cc31e629e8132df390ae692873c847d1c2f62c0James Dong    return;
21101cc31e629e8132df390ae692873c847d1c2f62c0James Dong}
21111cc31e629e8132df390ae692873c847d1c2f62c0James Dong
21121cc31e629e8132df390ae692873c847d1c2f62c0James Dongvoid eChromaFullMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
21131cc31e629e8132df390ae692873c847d1c2f62c0James Dong                        uint8 *pOut, int predPitch, int blkwidth, int blkheight)
21141cc31e629e8132df390ae692873c847d1c2f62c0James Dong{
21151cc31e629e8132df390ae692873c847d1c2f62c0James Dong    (void)(dx);
21161cc31e629e8132df390ae692873c847d1c2f62c0James Dong    (void)(dy);
21171cc31e629e8132df390ae692873c847d1c2f62c0James Dong
21181cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int i, j;
21191cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int offset_in = srcPitch - blkwidth;
21201cc31e629e8132df390ae692873c847d1c2f62c0James Dong    int offset_out = predPitch - blkwidth;
21211cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint16 temp;
21221cc31e629e8132df390ae692873c847d1c2f62c0James Dong    uint8 byte;
21231cc31e629e8132df390ae692873c847d1c2f62c0James Dong
21241cc31e629e8132df390ae692873c847d1c2f62c0James Dong    if (((uint32)pRef)&1)
21251cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
21261cc31e629e8132df390ae692873c847d1c2f62c0James Dong        for (j = blkheight; j > 0; j--)
21271cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
21281cc31e629e8132df390ae692873c847d1c2f62c0James Dong            for (i = blkwidth; i > 0; i -= 2)
21291cc31e629e8132df390ae692873c847d1c2f62c0James Dong            {
21301cc31e629e8132df390ae692873c847d1c2f62c0James Dong                temp = *pRef++;
21311cc31e629e8132df390ae692873c847d1c2f62c0James Dong                byte = *pRef++;
21321cc31e629e8132df390ae692873c847d1c2f62c0James Dong                temp |= (byte << 8);
21331cc31e629e8132df390ae692873c847d1c2f62c0James Dong                *((uint16*)pOut) = temp; /* write 2 bytes */
21341cc31e629e8132df390ae692873c847d1c2f62c0James Dong                pOut += 2;
21351cc31e629e8132df390ae692873c847d1c2f62c0James Dong            }
21361cc31e629e8132df390ae692873c847d1c2f62c0James Dong            pOut += offset_out;
21371cc31e629e8132df390ae692873c847d1c2f62c0James Dong            pRef += offset_in;
21381cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
21391cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
21401cc31e629e8132df390ae692873c847d1c2f62c0James Dong    else
21411cc31e629e8132df390ae692873c847d1c2f62c0James Dong    {
21421cc31e629e8132df390ae692873c847d1c2f62c0James Dong        for (j = blkheight; j > 0; j--)
21431cc31e629e8132df390ae692873c847d1c2f62c0James Dong        {
21441cc31e629e8132df390ae692873c847d1c2f62c0James Dong            for (i = blkwidth; i > 0; i -= 2)
21451cc31e629e8132df390ae692873c847d1c2f62c0James Dong            {
21461cc31e629e8132df390ae692873c847d1c2f62c0James Dong                temp = *((uint16*)pRef);
21471cc31e629e8132df390ae692873c847d1c2f62c0James Dong                *((uint16*)pOut) = temp;
21481cc31e629e8132df390ae692873c847d1c2f62c0James Dong                pRef += 2;
21491cc31e629e8132df390ae692873c847d1c2f62c0James Dong                pOut += 2;
21501cc31e629e8132df390ae692873c847d1c2f62c0James Dong            }
21511cc31e629e8132df390ae692873c847d1c2f62c0James Dong            pOut += offset_out;
21521cc31e629e8132df390ae692873c847d1c2f62c0James Dong            pRef += offset_in;
21531cc31e629e8132df390ae692873c847d1c2f62c0James Dong        }
21541cc31e629e8132df390ae692873c847d1c2f62c0James Dong    }
21551cc31e629e8132df390ae692873c847d1c2f62c0James Dong    return ;
21561cc31e629e8132df390ae692873c847d1c2f62c0James Dong}
2157