14a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber/* ------------------------------------------------------------------
24a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * Copyright (C) 1998-2009 PacketVideo
34a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *
44a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * Licensed under the Apache License, Version 2.0 (the "License");
54a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * you may not use this file except in compliance with the License.
64a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * You may obtain a copy of the License at
74a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *
84a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *      http://www.apache.org/licenses/LICENSE-2.0
94a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber *
104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * Unless required by applicable law or agreed to in writing, software
114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * distributed under the License is distributed on an "AS IS" BASIS,
124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * express or implied.
144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * See the License for the specific language governing permissions
154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * and limitations under the License.
164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber * -------------------------------------------------------------------
174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber */
184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#include "avcdec_lib.h"
194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#define CLIP_RESULT(x)      if((uint)x > 0xFF){ \
224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                 x = 0xFF & (~(x>>31));}
234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber/* (blkwidth << 2) + (dy << 1) + dx */
254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huberstatic void (*const ChromaMC_SIMD[8])(uint8 *, int , int , int , uint8 *, int, int , int) =
264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{
274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    &ChromaFullMC_SIMD,
284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    &ChromaHorizontalMC_SIMD,
294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    &ChromaVerticalMC_SIMD,
304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    &ChromaDiagonalMC_SIMD,
314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    &ChromaFullMC_SIMD,
324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    &ChromaHorizontalMC2_SIMD,
334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    &ChromaVerticalMC2_SIMD,
344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    &ChromaDiagonalMC2_SIMD
354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber};
364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber/* Perform motion prediction and compensation with residue if exist. */
374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid InterMBPrediction(AVCCommonObj *video)
384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{
394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    AVCMacroblock *currMB = video->currMB;
404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    AVCPictureData *currPic = video->currPic;
414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int mbPartIdx, subMbPartIdx;
424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int ref_idx;
434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int offset_MbPart_indx = 0;
444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int16 *mv;
454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint32 x_pos, y_pos;
464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint8 *curL, *curCb, *curCr;
474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint8 *ref_l, *ref_Cb, *ref_Cr;
484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint8 *predBlock, *predCb, *predCr;
494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int block_x, block_y, offset_x, offset_y, offsetP, offset;
504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int x_position = (video->mb_x << 4);
514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int y_position = (video->mb_y << 4);
524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int MbHeight, MbWidth, mbPartIdx_X, mbPartIdx_Y, offset_indx;
534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int picWidth = currPic->pitch;
544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int picHeight = currPic->height;
554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int16 *dataBlock;
564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint32 cbp4x4;
574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint32 tmp_word;
584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    tmp_word = y_position * picWidth;
604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    curL = currPic->Sl + tmp_word + x_position;
614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    offset = (tmp_word >> 2) + (x_position >> 1);
624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    curCb = currPic->Scb + offset;
634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    curCr = currPic->Scr + offset;
644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef USE_PRED_BLOCK
664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    predBlock = video->pred + 84;
674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    predCb = video->pred + 452;
684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    predCr = video->pred + 596;
694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#else
704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    predBlock = curL;
714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    predCb = curCb;
724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    predCr = curCr;
734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif
744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    GetMotionVectorPredictor(video, false);
764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    for (mbPartIdx = 0; mbPartIdx < currMB->NumMbPart; mbPartIdx++)
784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        MbHeight = currMB->SubMbPartHeight[mbPartIdx];
804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        MbWidth = currMB->SubMbPartWidth[mbPartIdx];
814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        mbPartIdx_X = ((mbPartIdx + offset_MbPart_indx) & 1);
824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        mbPartIdx_Y = (mbPartIdx + offset_MbPart_indx) >> 1;
834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        ref_idx = currMB->ref_idx_L0[(mbPartIdx_Y << 1) + mbPartIdx_X];
844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        offset_indx = 0;
854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        ref_l = video->RefPicList0[ref_idx]->Sl;
874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        ref_Cb = video->RefPicList0[ref_idx]->Scb;
884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        ref_Cr = video->RefPicList0[ref_idx]->Scr;
894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        for (subMbPartIdx = 0; subMbPartIdx < currMB->NumSubMbPart[mbPartIdx]; subMbPartIdx++)
914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            block_x = (mbPartIdx_X << 1) + ((subMbPartIdx + offset_indx) & 1);  // check this
934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            block_y = (mbPartIdx_Y << 1) + (((subMbPartIdx + offset_indx) >> 1) & 1);
944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            mv = (int16*)(currMB->mvL0 + block_x + (block_y << 2));
954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            offset_x = x_position + (block_x << 2);
964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            offset_y = y_position + (block_y << 2);
974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            x_pos = (offset_x << 2) + *mv++;   /*quarter pel */
984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            y_pos = (offset_y << 2) + *mv;   /*quarter pel */
994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
1004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            //offset = offset_y * currPic->width;
1014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            //offsetC = (offset >> 2) + (offset_x >> 1);
1024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef USE_PRED_BLOCK
1034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            offsetP = (block_y * 80) + (block_x << 2);
1044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            LumaMotionComp(ref_l, picWidth, picHeight, x_pos, y_pos,
1054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                           /*comp_Sl + offset + offset_x,*/
1064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                           predBlock + offsetP, 20, MbWidth, MbHeight);
1074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#else
1084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            offsetP = (block_y << 2) * picWidth + (block_x << 2);
1094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            LumaMotionComp(ref_l, picWidth, picHeight, x_pos, y_pos,
1104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                           /*comp_Sl + offset + offset_x,*/
1114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                           predBlock + offsetP, picWidth, MbWidth, MbHeight);
1124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif
1134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
1144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef USE_PRED_BLOCK
1154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            offsetP = (block_y * 24) + (block_x << 1);
1164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ChromaMotionComp(ref_Cb, picWidth >> 1, picHeight >> 1, x_pos, y_pos,
1174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                             /*comp_Scb +  offsetC,*/
1184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                             predCb + offsetP, 12, MbWidth >> 1, MbHeight >> 1);
1194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ChromaMotionComp(ref_Cr, picWidth >> 1, picHeight >> 1, x_pos, y_pos,
1204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                             /*comp_Scr +  offsetC,*/
1214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                             predCr + offsetP, 12, MbWidth >> 1, MbHeight >> 1);
1224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#else
1234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            offsetP = (block_y * picWidth) + (block_x << 1);
1244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ChromaMotionComp(ref_Cb, picWidth >> 1, picHeight >> 1, x_pos, y_pos,
1254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                             /*comp_Scb +  offsetC,*/
1264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                             predCb + offsetP, picWidth >> 1, MbWidth >> 1, MbHeight >> 1);
1274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ChromaMotionComp(ref_Cr, picWidth >> 1, picHeight >> 1, x_pos, y_pos,
1284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                             /*comp_Scr +  offsetC,*/
1294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                             predCr + offsetP, picWidth >> 1, MbWidth >> 1, MbHeight >> 1);
1304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif
1314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
1324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            offset_indx = currMB->SubMbPartWidth[mbPartIdx] >> 3;
1334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
1344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        offset_MbPart_indx = currMB->MbPartWidth >> 4;
1354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
1364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
1374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    /* used in decoder, used to be if(!encFlag)  */
1384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
1394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    /* transform in raster scan order */
1404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    dataBlock = video->block;
1414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    cbp4x4 = video->cbp4x4;
1424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    /* luma */
1434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    for (block_y = 4; block_y > 0; block_y--)
1444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
1454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        for (block_x = 4; block_x > 0; block_x--)
1464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
1474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef USE_PRED_BLOCK
1484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            if (cbp4x4&1)
1494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
1504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                itrans(dataBlock, predBlock, predBlock, 20);
1514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
1524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#else
1534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            if (cbp4x4&1)
1544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
1554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                itrans(dataBlock, curL, curL, picWidth);
1564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
1574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif
1584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            cbp4x4 >>= 1;
1594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            dataBlock += 4;
1604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef USE_PRED_BLOCK
1614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            predBlock += 4;
1624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#else
1634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            curL += 4;
1644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif
1654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
1664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        dataBlock += 48;
1674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef USE_PRED_BLOCK
1684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        predBlock += 64;
1694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#else
1704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        curL += ((picWidth << 2) - 16);
1714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif
1724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
1734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
1744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    /* chroma */
1754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    picWidth = (picWidth >> 1);
1764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    for (block_y = 2; block_y > 0; block_y--)
1774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
1784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        for (block_x = 2; block_x > 0; block_x--)
1794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
1804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef USE_PRED_BLOCK
1814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            if (cbp4x4&1)
1824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
1834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                ictrans(dataBlock, predCb, predCb, 12);
1844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
1854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#else
1864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            if (cbp4x4&1)
1874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
1884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                ictrans(dataBlock, curCb, curCb, picWidth);
1894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
1904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif
1914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            cbp4x4 >>= 1;
1924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            dataBlock += 4;
1934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef USE_PRED_BLOCK
1944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            predCb += 4;
1954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#else
1964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            curCb += 4;
1974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif
1984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
1994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        for (block_x = 2; block_x > 0; block_x--)
2004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
2014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef USE_PRED_BLOCK
2024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            if (cbp4x4&1)
2034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
2044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                ictrans(dataBlock, predCr, predCr, 12);
2054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
2064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#else
2074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            if (cbp4x4&1)
2084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
2094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                ictrans(dataBlock, curCr, curCr, picWidth);
2104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
2114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif
2124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            cbp4x4 >>= 1;
2134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            dataBlock += 4;
2144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef USE_PRED_BLOCK
2154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            predCr += 4;
2164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#else
2174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            curCr += 4;
2184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif
2194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
2204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        dataBlock += 48;
2214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef USE_PRED_BLOCK
2224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        predCb += 40;
2234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        predCr += 40;
2244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#else
2254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        curCb += ((picWidth << 2) - 8);
2264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        curCr += ((picWidth << 2) - 8);
2274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif
2284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
2294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
2304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#ifdef MB_BASED_DEBLOCK
2314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    SaveNeighborForIntraPred(video, offset);
2324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber#endif
2334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
2344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    return ;
2354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber}
2364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
2374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
2384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber/* preform the actual  motion comp here */
2394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid LumaMotionComp(uint8 *ref, int picwidth, int picheight,
2404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    int x_pos, int y_pos,
2414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    uint8 *pred, int pred_pitch,
2424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    int blkwidth, int blkheight)
2434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{
2444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int dx, dy;
2454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint8 temp[24][24]; /* for padding, make the size multiple of 4 for packing */
2464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int temp2[21][21]; /* for intermediate results */
2474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint8 *ref2;
2484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
2494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    dx = x_pos & 3;
2504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    dy = y_pos & 3;
2514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    x_pos = x_pos >> 2;  /* round it to full-pel resolution */
2524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    y_pos = y_pos >> 2;
2534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
2544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    /* perform actual motion compensation */
2554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    if (dx == 0 && dy == 0)
2564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {  /* fullpel position *//* G */
2574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        if (x_pos >= 0 && x_pos + blkwidth <= picwidth && y_pos >= 0 && y_pos + blkheight <= picheight)
2584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
2594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ref += y_pos * picwidth + x_pos;
2604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            FullPelMC(ref, picwidth, pred, pred_pitch, blkwidth, blkheight);
2614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
2624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        else
2634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
2644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            CreatePad(ref, picwidth, picheight, x_pos, y_pos, &temp[0][0], blkwidth, blkheight);
2654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            FullPelMC(&temp[0][0], 24, pred, pred_pitch, blkwidth, blkheight);
2664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
2674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
2684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }   /* other positions */
2694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    else  if (dy == 0)
2704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    { /* no vertical interpolation *//* a,b,c*/
2714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
2724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        if (x_pos - 2 >= 0 && x_pos + 3 + blkwidth <= picwidth && y_pos >= 0 && y_pos + blkheight <= picheight)
2734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
2744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ref += y_pos * picwidth + x_pos;
2754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
2764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            HorzInterp1MC(ref, picwidth, pred, pred_pitch, blkwidth, blkheight, dx);
2774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
2784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        else  /* need padding */
2794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
2804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            CreatePad(ref, picwidth, picheight, x_pos - 2, y_pos, &temp[0][0], blkwidth + 5, blkheight);
2814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
2824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            HorzInterp1MC(&temp[0][2], 24, pred, pred_pitch, blkwidth, blkheight, dx);
2834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
2844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
2854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    else if (dx == 0)
2864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    { /*no horizontal interpolation *//* d,h,n */
2874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
2884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        if (x_pos >= 0 && x_pos + blkwidth <= picwidth && y_pos - 2 >= 0 && y_pos + 3 + blkheight <= picheight)
2894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
2904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ref += y_pos * picwidth + x_pos;
2914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
2924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            VertInterp1MC(ref, picwidth, pred, pred_pitch, blkwidth, blkheight, dy);
2934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
2944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        else  /* need padding */
2954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
2964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            CreatePad(ref, picwidth, picheight, x_pos, y_pos - 2, &temp[0][0], blkwidth, blkheight + 5);
2974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
2984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            VertInterp1MC(&temp[2][0], 24, pred, pred_pitch, blkwidth, blkheight, dy);
2994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
3004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
3014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    else if (dy == 2)
3024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {  /* horizontal cross *//* i, j, k */
3034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
3044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        if (x_pos - 2 >= 0 && x_pos + 3 + blkwidth <= picwidth && y_pos - 2 >= 0 && y_pos + 3 + blkheight <= picheight)
3054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
3064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ref += y_pos * picwidth + x_pos - 2; /* move to the left 2 pixels */
3074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
3084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            VertInterp2MC(ref, picwidth, &temp2[0][0], 21, blkwidth + 5, blkheight);
3094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
3104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            HorzInterp2MC(&temp2[0][2], 21, pred, pred_pitch, blkwidth, blkheight, dx);
3114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
3124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        else /* need padding */
3134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
3144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            CreatePad(ref, picwidth, picheight, x_pos - 2, y_pos - 2, &temp[0][0], blkwidth + 5, blkheight + 5);
3154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
3164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            VertInterp2MC(&temp[2][0], 24, &temp2[0][0], 21, blkwidth + 5, blkheight);
3174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
3184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            HorzInterp2MC(&temp2[0][2], 21, pred, pred_pitch, blkwidth, blkheight, dx);
3194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
3204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
3214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    else if (dx == 2)
3224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    { /* vertical cross */ /* f,q */
3234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
3244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        if (x_pos - 2 >= 0 && x_pos + 3 + blkwidth <= picwidth && y_pos - 2 >= 0 && y_pos + 3 + blkheight <= picheight)
3254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
3264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ref += (y_pos - 2) * picwidth + x_pos; /* move to up 2 lines */
3274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
3284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            HorzInterp3MC(ref, picwidth, &temp2[0][0], 21, blkwidth, blkheight + 5);
3294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            VertInterp3MC(&temp2[2][0], 21, pred, pred_pitch, blkwidth, blkheight, dy);
3304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
3314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        else  /* need padding */
3324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
3334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            CreatePad(ref, picwidth, picheight, x_pos - 2, y_pos - 2, &temp[0][0], blkwidth + 5, blkheight + 5);
3344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            HorzInterp3MC(&temp[0][2], 24, &temp2[0][0], 21, blkwidth, blkheight + 5);
3354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            VertInterp3MC(&temp2[2][0], 21, pred, pred_pitch, blkwidth, blkheight, dy);
3364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
3374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
3384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    else
3394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    { /* diagonal *//* e,g,p,r */
3404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
3414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        if (x_pos - 2 >= 0 && x_pos + 3 + (dx / 2) + blkwidth <= picwidth &&
3424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                y_pos - 2 >= 0 && y_pos + 3 + blkheight + (dy / 2) <= picheight)
3434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
3444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ref2 = ref + (y_pos + (dy / 2)) * picwidth + x_pos;
3454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
3464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ref += (y_pos * picwidth) + x_pos + (dx / 2);
3474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
3484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            DiagonalInterpMC(ref2, ref, picwidth, pred, pred_pitch, blkwidth, blkheight);
3494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
3504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        else  /* need padding */
3514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
3524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            CreatePad(ref, picwidth, picheight, x_pos - 2, y_pos - 2, &temp[0][0], blkwidth + 5 + (dx / 2), blkheight + 5 + (dy / 2));
3534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
3544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ref2 = &temp[2 + (dy/2)][2];
3554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
3564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ref = &temp[2][2 + (dx/2)];
3574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
3584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            DiagonalInterpMC(ref2, ref, 24, pred, pred_pitch, blkwidth, blkheight);
3594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
3604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
3614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
3624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    return ;
3634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber}
3644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
3654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid CreateAlign(uint8 *ref, int picwidth, int y_pos,
3664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                 uint8 *out, int blkwidth, int blkheight)
3674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{
3684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int i, j;
3694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int offset, out_offset;
3704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint32 prev_pix, result, pix1, pix2, pix4;
3714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
3724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    out_offset = 24 - blkwidth;
3734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
3744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    //switch(x_pos&0x3){
3754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    switch (((uint32)ref)&0x3)
3764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
3774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        case 1:
3784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ref += y_pos * picwidth;
3794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            offset =  picwidth - blkwidth - 3;
3804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            for (j = 0; j < blkheight; j++)
3814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
3824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                pix1 = *ref++;
3834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                pix2 = *((uint16*)ref);
3844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                ref += 2;
3854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (pix2 << 8) | pix1;
3864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
3874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                for (i = 3; i < blkwidth; i += 4)
3884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                {
3894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    pix4 = *((uint32*)ref);
3904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    ref += 4;
3914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    prev_pix = (pix4 << 24) & 0xFF000000; /* mask out byte belong to previous word */
3924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result |= prev_pix;
3934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    *((uint32*)out) = result;  /* write 4 bytes */
3944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    out += 4;
3954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = pix4 >> 8; /* for the next loop */
3964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                }
3974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                ref += offset;
3984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                out += out_offset;
3994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
4004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            break;
4014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        case 2:
4024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ref += y_pos * picwidth;
4034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            offset =  picwidth - blkwidth - 2;
4044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            for (j = 0; j < blkheight; j++)
4054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
4064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = *((uint16*)ref);
4074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                ref += 2;
4084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                for (i = 2; i < blkwidth; i += 4)
4094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                {
4104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    pix4 = *((uint32*)ref);
4114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    ref += 4;
4124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    prev_pix = (pix4 << 16) & 0xFFFF0000; /* mask out byte belong to previous word */
4134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result |= prev_pix;
4144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    *((uint32*)out) = result;  /* write 4 bytes */
4154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    out += 4;
4164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = pix4 >> 16; /* for the next loop */
4174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                }
4184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                ref += offset;
4194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                out += out_offset;
4204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
4214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            break;
4224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        case 3:
4234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ref += y_pos * picwidth;
4244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            offset =  picwidth - blkwidth - 1;
4254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            for (j = 0; j < blkheight; j++)
4264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
4274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = *ref++;
4284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                for (i = 1; i < blkwidth; i += 4)
4294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                {
4304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    pix4 = *((uint32*)ref);
4314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    ref += 4;
4324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    prev_pix = (pix4 << 8) & 0xFFFFFF00; /* mask out byte belong to previous word */
4334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result |= prev_pix;
4344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    *((uint32*)out) = result;  /* write 4 bytes */
4354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    out += 4;
4364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = pix4 >> 24; /* for the next loop */
4374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                }
4384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                ref += offset;
4394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                out += out_offset;
4404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
4414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            break;
4424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
4434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber}
4444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
4454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid CreatePad(uint8 *ref, int picwidth, int picheight, int x_pos, int y_pos,
4464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber               uint8 *out, int blkwidth, int blkheight)
4474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{
4484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int x_inc0, x_mid;
4494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int y_inc, y_inc0, y_inc1, y_mid;
4504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int i, j;
4514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int offset;
4524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
4534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    if (x_pos < 0)
4544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
4554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        x_inc0 = 0;  /* increment for the first part */
4564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        x_mid = ((blkwidth + x_pos > 0) ? -x_pos : blkwidth);  /* stopping point */
4574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        x_pos = 0;
4584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
4594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    else if (x_pos + blkwidth > picwidth)
4604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
4614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        x_inc0 = 1;  /* increasing */
4624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        x_mid = ((picwidth > x_pos) ? picwidth - x_pos - 1 : 0);  /* clip negative to zero, encode fool proof! */
4634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
4644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    else    /* normal case */
4654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
4664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        x_inc0 = 1;
4674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        x_mid = blkwidth; /* just one run */
4684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
4694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
4704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
4714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    /* boundary for y_pos, taking the result from x_pos into account */
4724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    if (y_pos < 0)
4734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
4744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        y_inc0 = (x_inc0 ? - x_mid : -blkwidth + x_mid); /* offset depending on x_inc1 and x_inc0 */
4754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        y_inc1 = picwidth + y_inc0;
4764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        y_mid = ((blkheight + y_pos > 0) ? -y_pos : blkheight); /* clip to prevent memory corruption */
4774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        y_pos = 0;
4784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
4794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    else  if (y_pos + blkheight > picheight)
4804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
4814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        y_inc1 = (x_inc0 ? - x_mid : -blkwidth + x_mid); /* saturate */
4824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        y_inc0 = picwidth + y_inc1;                 /* increasing */
4834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        y_mid = ((picheight > y_pos) ? picheight - 1 - y_pos : 0);
4844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
4854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    else  /* normal case */
4864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
4874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        y_inc1 = (x_inc0 ? - x_mid : -blkwidth + x_mid);
4884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        y_inc0 = picwidth + y_inc1;
4894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        y_mid = blkheight;
4904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
4914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
4924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    /* clip y_pos and x_pos */
4934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    if (y_pos > picheight - 1) y_pos = picheight - 1;
4944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    if (x_pos > picwidth - 1) x_pos = picwidth - 1;
4954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
4964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    ref += y_pos * picwidth + x_pos;
4974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
4984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    y_inc = y_inc0;  /* start with top half */
4994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
5004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    offset = 24 - blkwidth; /* to use in offset out */
5014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    blkwidth -= x_mid; /* to use in the loop limit */
5024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
5034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    if (x_inc0 == 0)
5044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
5054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        for (j = 0; j < blkheight; j++)
5064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
5074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            if (j == y_mid)  /* put a check here to reduce the code size (for unrolling the loop) */
5084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
5094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                y_inc = y_inc1;  /* switch to lower half */
5104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
5114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            for (i = x_mid; i > 0; i--)   /* first or third quarter */
5124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
5134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                *out++ = *ref;
5144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
5154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            for (i = blkwidth; i > 0; i--)  /* second or fourth quarter */
5164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
5174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                *out++ = *ref++;
5184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
5194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            out += offset;
5204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ref += y_inc;
5214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
5224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
5234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    else
5244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
5254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        for (j = 0; j < blkheight; j++)
5264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
5274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            if (j == y_mid)  /* put a check here to reduce the code size (for unrolling the loop) */
5284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
5294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                y_inc = y_inc1;  /* switch to lower half */
5304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
5314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            for (i = x_mid; i > 0; i--)   /* first or third quarter */
5324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
5334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                *out++ = *ref++;
5344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
5354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            for (i = blkwidth; i > 0; i--)  /* second or fourth quarter */
5364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
5374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                *out++ = *ref;
5384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
5394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            out += offset;
5404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ref += y_inc;
5414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
5424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
5434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
5444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    return ;
5454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber}
5464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
5474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid HorzInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch,
5484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                   int blkwidth, int blkheight, int dx)
5494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{
5504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint8 *p_ref;
5514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint32 *p_cur;
5524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint32 tmp, pkres;
5534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int result, curr_offset, ref_offset;
5544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int j;
5554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int32 r0, r1, r2, r3, r4, r5;
5564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int32 r13, r6;
5574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
5584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    p_cur = (uint32*)out; /* assume it's word aligned */
5594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    curr_offset = (outpitch - blkwidth) >> 2;
5604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    p_ref = in;
5614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    ref_offset = inpitch - blkwidth;
5624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
5634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    if (dx&1)
5644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
5654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        dx = ((dx >> 1) ? -3 : -4); /* use in 3/4 pel */
5664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        p_ref -= 2;
5674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        r13 = 0;
5684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        for (j = blkheight; j > 0; j--)
5694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
5704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            tmp = (uint32)(p_ref + blkwidth);
5714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 = p_ref[0];
5724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = p_ref[2];
5734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 |= (r1 << 16);           /* 0,c,0,a */
5744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = p_ref[1];
5754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 = p_ref[3];
5764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 |= (r2 << 16);           /* 0,d,0,b */
5774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            while ((uint32)p_ref < tmp)
5784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
5794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = *(p_ref += 4); /* move pointer to e */
5804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r3 = p_ref[2];
5814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 |= (r3 << 16);           /* 0,g,0,e */
5824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r3 = p_ref[1];
5834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r4 = p_ref[3];
5844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r3 |= (r4 << 16);           /* 0,h,0,f */
5854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
5864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r4 = r0 + r3;       /* c+h, a+f */
5874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 = r0 + r1;   /* c+d, a+b */
5884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r6 = r2 + r3;   /* g+h, e+f */
5894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 >>= 16;
5904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 |= (r6 << 16);   /* e+f, c+d */
5914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r4 += r5 * 20;      /* c+20*e+20*f+h, a+20*c+20*d+f */
5924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r4 += 0x100010; /* +16, +16 */
5934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 = r1 + r2;       /* d+g, b+e */
5944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r4 -= r5 * 5;       /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */
5954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r4 >>= 5;
5964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r13 |= r4;      /* check clipping */
5974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
5984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 = p_ref[dx+2];
5994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r6 = p_ref[dx+4];
6004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 |= (r6 << 16);
6014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r4 += r5;
6024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r4 += 0x10001;
6034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r4 = (r4 >> 1) & 0xFF00FF;
6044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
6054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 = p_ref[4];  /* i */
6064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r6 = (r5 << 16);
6074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 = r6 | (r2 >> 16);/* 0,i,0,g */
6084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 += r1;       /* d+i, b+g */ /* r5 not free */
6094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 >>= 16;
6104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */
6114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 += r2;       /* f+g, d+e */
6124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 += 20 * r1;  /* d+20f+20g+i, b+20d+20e+g */
6134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 >>= 16;
6144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */
6154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 += r3;       /* e+h, c+f */
6164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 += 0x100010; /* 16,16 */
6174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 -= r0 * 5;       /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */
6184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 >>= 5;
6194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r13 |= r5;      /* check clipping */
6204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
6214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = p_ref[dx+3];
6224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = p_ref[dx+5];
6234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 |= (r1 << 16);
6244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 += r0;
6254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 += 0x10001;
6264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 = (r5 >> 1) & 0xFF00FF;
6274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
6284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r4 |= (r5 << 8);    /* pack them together */
6294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                *p_cur++ = r4;
6304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = r3;
6314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = r2;
6324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
6334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_cur += curr_offset; /* move to the next line */
6344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_ref += ref_offset;  /*    ref_offset = inpitch-blkwidth; */
6354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
6364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            if (r13&0xFF000700) /* need clipping */
6374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
6384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* move back to the beginning of the line */
6394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                p_ref -= (ref_offset + blkwidth);   /* input */
6404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                p_cur -= (outpitch >> 2);
6414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
6424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                tmp = (uint32)(p_ref + blkwidth);
6434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                for (; (uint32)p_ref < tmp;)
6444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                {
6454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
6464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r0 = *p_ref++;
6474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r1 = *p_ref++;
6484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r2 = *p_ref++;
6494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r3 = *p_ref++;
6504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r4 = *p_ref++;
6514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    /* first pixel */
6524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r5 = *p_ref++;
6534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (r0 + r5);
6544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r0 = (r1 + r4);
6554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
6564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r0 = (r2 + r3);
6574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
6584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result + 16) >> 5;
6594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    CLIP_RESULT(result)
6604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    /* 3/4 pel,  no need to clip */
6614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result + p_ref[dx] + 1);
6624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    pkres = (result >> 1) ;
6634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    /* second pixel */
6644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r0 = *p_ref++;
6654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (r1 + r0);
6664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r1 = (r2 + r5);
6674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
6684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r1 = (r3 + r4);
6694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
6704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result + 16) >> 5;
6714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    CLIP_RESULT(result)
6724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    /* 3/4 pel,  no need to clip */
6734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result + p_ref[dx] + 1);
6744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result >> 1);
6754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    pkres  |= (result << 8);
6764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    /* third pixel */
6774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r1 = *p_ref++;
6784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (r2 + r1);
6794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r2 = (r3 + r0);
6804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
6814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r2 = (r4 + r5);
6824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
6834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result + 16) >> 5;
6844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    CLIP_RESULT(result)
6854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    /* 3/4 pel,  no need to clip */
6864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result + p_ref[dx] + 1);
6874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result >> 1);
6884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    pkres  |= (result << 16);
6894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    /* fourth pixel */
6904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r2 = *p_ref++;
6914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (r3 + r2);
6924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r3 = (r4 + r1);
6934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
6944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r3 = (r5 + r0);
6954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
6964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result + 16) >> 5;
6974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    CLIP_RESULT(result)
6984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    /* 3/4 pel,  no need to clip */
6994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result + p_ref[dx] + 1);
7004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result >> 1);
7014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    pkres  |= (result << 24);
7024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    *p_cur++ = pkres; /* write 4 pixels */
7034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    p_ref -= 5;  /* offset back to the middle of filter */
7044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                }
7054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                p_cur += curr_offset;  /* move to the next line */
7064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                p_ref += ref_offset;    /* move to the next line */
7074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
7084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
7094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
7104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    else
7114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
7124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        p_ref -= 2;
7134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        r13 = 0;
7144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        for (j = blkheight; j > 0; j--)
7154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
7164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            tmp = (uint32)(p_ref + blkwidth);
7174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 = p_ref[0];
7184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = p_ref[2];
7194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 |= (r1 << 16);           /* 0,c,0,a */
7204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = p_ref[1];
7214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 = p_ref[3];
7224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 |= (r2 << 16);           /* 0,d,0,b */
7234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            while ((uint32)p_ref < tmp)
7244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
7254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = *(p_ref += 4); /* move pointer to e */
7264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r3 = p_ref[2];
7274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 |= (r3 << 16);           /* 0,g,0,e */
7284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r3 = p_ref[1];
7294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r4 = p_ref[3];
7304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r3 |= (r4 << 16);           /* 0,h,0,f */
7314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
7324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r4 = r0 + r3;       /* c+h, a+f */
7334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 = r0 + r1;   /* c+d, a+b */
7344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r6 = r2 + r3;   /* g+h, e+f */
7354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 >>= 16;
7364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 |= (r6 << 16);   /* e+f, c+d */
7374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r4 += r5 * 20;      /* c+20*e+20*f+h, a+20*c+20*d+f */
7384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r4 += 0x100010; /* +16, +16 */
7394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 = r1 + r2;       /* d+g, b+e */
7404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r4 -= r5 * 5;       /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */
7414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r4 >>= 5;
7424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r13 |= r4;      /* check clipping */
7434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r4 &= 0xFF00FF; /* mask */
7444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
7454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 = p_ref[4];  /* i */
7464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r6 = (r5 << 16);
7474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 = r6 | (r2 >> 16);/* 0,i,0,g */
7484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 += r1;       /* d+i, b+g */ /* r5 not free */
7494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 >>= 16;
7504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */
7514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 += r2;       /* f+g, d+e */
7524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 += 20 * r1;  /* d+20f+20g+i, b+20d+20e+g */
7534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 >>= 16;
7544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */
7554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 += r3;       /* e+h, c+f */
7564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 += 0x100010; /* 16,16 */
7574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 -= r0 * 5;       /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */
7584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 >>= 5;
7594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r13 |= r5;      /* check clipping */
7604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 &= 0xFF00FF; /* mask */
7614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
7624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r4 |= (r5 << 8);    /* pack them together */
7634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                *p_cur++ = r4;
7644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = r3;
7654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = r2;
7664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
7674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_cur += curr_offset; /* move to the next line */
7684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_ref += ref_offset;  /*    ref_offset = inpitch-blkwidth; */
7694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
7704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            if (r13&0xFF000700) /* need clipping */
7714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
7724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* move back to the beginning of the line */
7734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                p_ref -= (ref_offset + blkwidth);   /* input */
7744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                p_cur -= (outpitch >> 2);
7754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
7764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                tmp = (uint32)(p_ref + blkwidth);
7774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                for (; (uint32)p_ref < tmp;)
7784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                {
7794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
7804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r0 = *p_ref++;
7814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r1 = *p_ref++;
7824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r2 = *p_ref++;
7834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r3 = *p_ref++;
7844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r4 = *p_ref++;
7854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    /* first pixel */
7864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r5 = *p_ref++;
7874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (r0 + r5);
7884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r0 = (r1 + r4);
7894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
7904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r0 = (r2 + r3);
7914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
7924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result + 16) >> 5;
7934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    CLIP_RESULT(result)
7944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    pkres  = result;
7954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    /* second pixel */
7964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r0 = *p_ref++;
7974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (r1 + r0);
7984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r1 = (r2 + r5);
7994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
8004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r1 = (r3 + r4);
8014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
8024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result + 16) >> 5;
8034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    CLIP_RESULT(result)
8044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    pkres  |= (result << 8);
8054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    /* third pixel */
8064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r1 = *p_ref++;
8074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (r2 + r1);
8084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r2 = (r3 + r0);
8094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
8104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r2 = (r4 + r5);
8114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
8124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result + 16) >> 5;
8134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    CLIP_RESULT(result)
8144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    pkres  |= (result << 16);
8154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    /* fourth pixel */
8164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r2 = *p_ref++;
8174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (r3 + r2);
8184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r3 = (r4 + r1);
8194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
8204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r3 = (r5 + r0);
8214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
8224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result + 16) >> 5;
8234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    CLIP_RESULT(result)
8244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    pkres  |= (result << 24);
8254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    *p_cur++ = pkres;   /* write 4 pixels */
8264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    p_ref -= 5;
8274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                }
8284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                p_cur += curr_offset; /* move to the next line */
8294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                p_ref += ref_offset;
8304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
8314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
8324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
8334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
8344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    return ;
8354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber}
8364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
8374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid HorzInterp2MC(int *in, int inpitch, uint8 *out, int outpitch,
8384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                   int blkwidth, int blkheight, int dx)
8394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{
8404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int *p_ref;
8414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint32 *p_cur;
8424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint32 tmp, pkres;
8434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int result, result2, curr_offset, ref_offset;
8444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int j, r0, r1, r2, r3, r4, r5;
8454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
8464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    p_cur = (uint32*)out; /* assume it's word aligned */
8474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    curr_offset = (outpitch - blkwidth) >> 2;
8484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    p_ref = in;
8494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    ref_offset = inpitch - blkwidth;
8504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
8514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    if (dx&1)
8524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
8534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        dx = ((dx >> 1) ? -3 : -4); /* use in 3/4 pel */
8544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
8554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        for (j = blkheight; j > 0 ; j--)
8564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
8574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            tmp = (uint32)(p_ref + blkwidth);
8584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            for (; (uint32)p_ref < tmp;)
8594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
8604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
8614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = p_ref[-2];
8624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = p_ref[-1];
8634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = *p_ref++;
8644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r3 = *p_ref++;
8654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r4 = *p_ref++;
8664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* first pixel */
8674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 = *p_ref++;
8684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (r0 + r5);
8694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = (r1 + r4);
8704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
8714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = (r2 + r3);
8724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
8734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + 512) >> 10;
8744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result)
8754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result2 = ((p_ref[dx] + 16) >> 5);
8764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result2)
8774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* 3/4 pel,  no need to clip */
8784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + result2 + 1);
8794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                pkres = (result >> 1);
8804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* second pixel */
8814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = *p_ref++;
8824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (r1 + r0);
8834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = (r2 + r5);
8844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
8854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = (r3 + r4);
8864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
8874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + 512) >> 10;
8884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result)
8894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result2 = ((p_ref[dx] + 16) >> 5);
8904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result2)
8914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* 3/4 pel,  no need to clip */
8924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + result2 + 1);
8934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result >> 1);
8944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                pkres  |= (result << 8);
8954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* third pixel */
8964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = *p_ref++;
8974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (r2 + r1);
8984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = (r3 + r0);
8994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
9004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = (r4 + r5);
9014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
9024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + 512) >> 10;
9034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result)
9044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result2 = ((p_ref[dx] + 16) >> 5);
9054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result2)
9064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* 3/4 pel,  no need to clip */
9074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + result2 + 1);
9084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result >> 1);
9094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                pkres  |= (result << 16);
9104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* fourth pixel */
9114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = *p_ref++;
9124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (r3 + r2);
9134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r3 = (r4 + r1);
9144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
9154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r3 = (r5 + r0);
9164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
9174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + 512) >> 10;
9184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result)
9194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result2 = ((p_ref[dx] + 16) >> 5);
9204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result2)
9214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* 3/4 pel,  no need to clip */
9224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + result2 + 1);
9234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result >> 1);
9244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                pkres  |= (result << 24);
9254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                *p_cur++ = pkres; /* write 4 pixels */
9264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                p_ref -= 3;  /* offset back to the middle of filter */
9274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
9284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_cur += curr_offset;  /* move to the next line */
9294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_ref += ref_offset;    /* move to the next line */
9304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
9314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
9324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    else
9334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
9344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        for (j = blkheight; j > 0 ; j--)
9354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
9364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            tmp = (uint32)(p_ref + blkwidth);
9374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            for (; (uint32)p_ref < tmp;)
9384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
9394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
9404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = p_ref[-2];
9414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = p_ref[-1];
9424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = *p_ref++;
9434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r3 = *p_ref++;
9444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r4 = *p_ref++;
9454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* first pixel */
9464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 = *p_ref++;
9474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (r0 + r5);
9484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = (r1 + r4);
9494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
9504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = (r2 + r3);
9514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
9524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + 512) >> 10;
9534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result)
9544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                pkres  = result;
9554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* second pixel */
9564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = *p_ref++;
9574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (r1 + r0);
9584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = (r2 + r5);
9594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
9604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = (r3 + r4);
9614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
9624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + 512) >> 10;
9634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result)
9644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                pkres  |= (result << 8);
9654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* third pixel */
9664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = *p_ref++;
9674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (r2 + r1);
9684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = (r3 + r0);
9694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
9704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = (r4 + r5);
9714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
9724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + 512) >> 10;
9734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result)
9744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                pkres  |= (result << 16);
9754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* fourth pixel */
9764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = *p_ref++;
9774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (r3 + r2);
9784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r3 = (r4 + r1);
9794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
9804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r3 = (r5 + r0);
9814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
9824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + 512) >> 10;
9834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result)
9844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                pkres  |= (result << 24);
9854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                *p_cur++ = pkres; /* write 4 pixels */
9864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                p_ref -= 3;  /* offset back to the middle of filter */
9874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
9884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_cur += curr_offset;  /* move to the next line */
9894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_ref += ref_offset;    /* move to the next line */
9904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
9914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
9924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
9934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    return ;
9944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber}
9954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
9964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid HorzInterp3MC(uint8 *in, int inpitch, int *out, int outpitch,
9974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                   int blkwidth, int blkheight)
9984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{
9994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint8 *p_ref;
10004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int   *p_cur;
10014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint32 tmp;
10024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int result, curr_offset, ref_offset;
10034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int j, r0, r1, r2, r3, r4, r5;
10044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
10054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    p_cur = out;
10064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    curr_offset = (outpitch - blkwidth);
10074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    p_ref = in;
10084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    ref_offset = inpitch - blkwidth;
10094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
10104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    for (j = blkheight; j > 0 ; j--)
10114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
10124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        tmp = (uint32)(p_ref + blkwidth);
10134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        for (; (uint32)p_ref < tmp;)
10144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
10154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
10164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 = p_ref[-2];
10174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = p_ref[-1];
10184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 = *p_ref++;
10194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r3 = *p_ref++;
10204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r4 = *p_ref++;
10214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            /* first pixel */
10224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r5 = *p_ref++;
10234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result = (r0 + r5);
10244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 = (r1 + r4);
10254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
10264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 = (r2 + r3);
10274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
10284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            *p_cur++ = result;
10294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            /* second pixel */
10304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 = *p_ref++;
10314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result = (r1 + r0);
10324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = (r2 + r5);
10334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
10344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = (r3 + r4);
10354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
10364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            *p_cur++ = result;
10374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            /* third pixel */
10384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = *p_ref++;
10394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result = (r2 + r1);
10404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 = (r3 + r0);
10414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
10424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 = (r4 + r5);
10434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
10444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            *p_cur++ = result;
10454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            /* fourth pixel */
10464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 = *p_ref++;
10474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result = (r3 + r2);
10484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r3 = (r4 + r1);
10494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
10504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r3 = (r5 + r0);
10514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
10524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            *p_cur++ = result;
10534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_ref -= 3; /* move back to the middle of the filter */
10544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
10554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        p_cur += curr_offset; /* move to the next line */
10564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        p_ref += ref_offset;
10574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
10584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
10594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    return ;
10604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber}
10614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid VertInterp1MC(uint8 *in, int inpitch, uint8 *out, int outpitch,
10624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                   int blkwidth, int blkheight, int dy)
10634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{
10644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint8 *p_cur, *p_ref;
10654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint32 tmp;
10664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int result, curr_offset, ref_offset;
10674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int j, i;
10684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int32 r0, r1, r2, r3, r4, r5, r6, r7, r8, r13;
10694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint8  tmp_in[24][24];
10704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
10714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    /* not word-aligned */
10724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    if (((uint32)in)&0x3)
10734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
10744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        CreateAlign(in, inpitch, -2, &tmp_in[0][0], blkwidth, blkheight + 5);
10754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        in = &tmp_in[2][0];
10764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        inpitch = 24;
10774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
10784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    p_cur = out;
10794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */
10804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    ref_offset = blkheight * inpitch; /* for limit */
10814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
10824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    curr_offset += 3;
10834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
10844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    if (dy&1)
10854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
10864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        dy = (dy >> 1) ? 0 : -inpitch;
10874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
10884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        for (j = 0; j < blkwidth; j += 4, in += 4)
10894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
10904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r13 = 0;
10914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_ref = in;
10924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_cur -= outpitch;  /* compensate for the first offset */
10934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            tmp = (uint32)(p_ref + ref_offset); /* limit */
10944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            while ((uint32)p_ref < tmp)  /* the loop un-rolled  */
10954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
10964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */
10974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                p_ref += inpitch;
10984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */
10994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 &= 0xFF00FF;
11004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
11014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = *((uint32*)(p_ref + (inpitch << 1)));  /* r1, r7, ref[3] */
11024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r7 = (r1 >> 8) & 0xFF00FF;
11034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 &= 0xFF00FF;
11044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
11054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 += r1;
11064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r6 += r7;
11074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
11084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */
11094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r8 = (r2 >> 8) & 0xFF00FF;
11104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 &= 0xFF00FF;
11114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
11124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */
11134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r7 = (r1 >> 8) & 0xFF00FF;
11144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 &= 0xFF00FF;
11154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 += r2;
11164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
11174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r7 += r8;
11184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
11194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 += 20 * r1;
11204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r6 += 20 * r7;
11214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 += 0x100010;
11224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r6 += 0x100010;
11234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
11244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */
11254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r8 = (r2 >> 8) & 0xFF00FF;
11264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 &= 0xFF00FF;
11274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
11284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */
11294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r7 = (r1 >> 8) & 0xFF00FF;
11304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 &= 0xFF00FF;
11314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 += r2;
11324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
11334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r7 += r8;
11344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
11354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 -= 5 * r1;
11364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r6 -= 5 * r7;
11374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
11384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 >>= 5;
11394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r6 >>= 5;
11404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* clip */
11414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r13 |= r6;
11424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r13 |= r0;
11434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                //CLIPPACK(r6,result)
11444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
11454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = *((uint32*)(p_ref + dy));
11464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = (r1 >> 8) & 0xFF00FF;
11474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 &= 0xFF00FF;
11484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 += r1;
11494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r6 += r2;
11504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 += 0x10001;
11514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r6 += 0x10001;
11524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = (r0 >> 1) & 0xFF00FF;
11534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r6 = (r6 >> 1) & 0xFF00FF;
11544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
11554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 |= (r6 << 8);  /* pack it back */
11564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                *((uint32*)(p_cur += outpitch)) = r0;
11574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
11584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_cur += curr_offset; /* offset to the next pixel */
11594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            if (r13 & 0xFF000700) /* this column need clipping */
11604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
11614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                p_cur -= 4;
11624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                for (i = 0; i < 4; i++)
11634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                {
11644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    p_ref = in + i;
11654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    p_cur -= outpitch;  /* compensate for the first offset */
11664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
11674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    tmp = (uint32)(p_ref + ref_offset); /* limit */
11684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    while ((uint32)p_ref < tmp)
11694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    {                           /* loop un-rolled */
11704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r0 = *(p_ref - (inpitch << 1));
11714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r1 = *(p_ref - inpitch);
11724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r2 = *p_ref;
11734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r3 = *(p_ref += inpitch);  /* modify pointer before loading */
11744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r4 = *(p_ref += inpitch);
11754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        /* first pixel */
11764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r5 = *(p_ref += inpitch);
11774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (r0 + r5);
11784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r0 = (r1 + r4);
11794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
11804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r0 = (r2 + r3);
11814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
11824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (result + 16) >> 5;
11834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        CLIP_RESULT(result)
11844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        /* 3/4 pel,  no need to clip */
11854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (result + p_ref[dy-(inpitch<<1)] + 1);
11864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (result >> 1);
11874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        *(p_cur += outpitch) = result;
11884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        /* second pixel */
11894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r0 = *(p_ref += inpitch);
11904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (r1 + r0);
11914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r1 = (r2 + r5);
11924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
11934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r1 = (r3 + r4);
11944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
11954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (result + 16) >> 5;
11964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        CLIP_RESULT(result)
11974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        /* 3/4 pel,  no need to clip */
11984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (result + p_ref[dy-(inpitch<<1)] + 1);
11994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (result >> 1);
12004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        *(p_cur += outpitch) = result;
12014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        /* third pixel */
12024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r1 = *(p_ref += inpitch);
12034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (r2 + r1);
12044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r2 = (r3 + r0);
12054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
12064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r2 = (r4 + r5);
12074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
12084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (result + 16) >> 5;
12094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        CLIP_RESULT(result)
12104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        /* 3/4 pel,  no need to clip */
12114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (result + p_ref[dy-(inpitch<<1)] + 1);
12124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (result >> 1);
12134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        *(p_cur += outpitch) = result;
12144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        /* fourth pixel */
12154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r2 = *(p_ref += inpitch);
12164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (r3 + r2);
12174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r3 = (r4 + r1);
12184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
12194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r3 = (r5 + r0);
12204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
12214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (result + 16) >> 5;
12224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        CLIP_RESULT(result)
12234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        /* 3/4 pel,  no need to clip */
12244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (result + p_ref[dy-(inpitch<<1)] + 1);
12254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (result >> 1);
12264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        *(p_cur += outpitch) = result;
12274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        p_ref -= (inpitch << 1);  /* move back to center of the filter of the next one */
12284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    }
12294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    p_cur += (curr_offset - 3);
12304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                }
12314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
12324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
12334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
12344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    else
12354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
12364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        for (j = 0; j < blkwidth; j += 4, in += 4)
12374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
12384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r13 = 0;
12394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_ref = in;
12404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_cur -= outpitch;  /* compensate for the first offset */
12414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            tmp = (uint32)(p_ref + ref_offset); /* limit */
12424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            while ((uint32)p_ref < tmp)  /* the loop un-rolled  */
12434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
12444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */
12454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                p_ref += inpitch;
12464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */
12474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 &= 0xFF00FF;
12484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
12494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = *((uint32*)(p_ref + (inpitch << 1)));  /* r1, r7, ref[3] */
12504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r7 = (r1 >> 8) & 0xFF00FF;
12514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 &= 0xFF00FF;
12524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
12534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 += r1;
12544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r6 += r7;
12554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
12564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */
12574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r8 = (r2 >> 8) & 0xFF00FF;
12584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 &= 0xFF00FF;
12594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
12604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */
12614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r7 = (r1 >> 8) & 0xFF00FF;
12624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 &= 0xFF00FF;
12634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 += r2;
12644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
12654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r7 += r8;
12664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
12674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 += 20 * r1;
12684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r6 += 20 * r7;
12694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 += 0x100010;
12704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r6 += 0x100010;
12714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
12724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */
12734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r8 = (r2 >> 8) & 0xFF00FF;
12744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 &= 0xFF00FF;
12754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
12764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */
12774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r7 = (r1 >> 8) & 0xFF00FF;
12784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 &= 0xFF00FF;
12794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 += r2;
12804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
12814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r7 += r8;
12824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
12834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 -= 5 * r1;
12844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r6 -= 5 * r7;
12854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
12864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 >>= 5;
12874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r6 >>= 5;
12884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* clip */
12894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r13 |= r6;
12904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r13 |= r0;
12914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                //CLIPPACK(r6,result)
12924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 &= 0xFF00FF;
12934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r6 &= 0xFF00FF;
12944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 |= (r6 << 8);  /* pack it back */
12954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                *((uint32*)(p_cur += outpitch)) = r0;
12964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
12974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_cur += curr_offset; /* offset to the next pixel */
12984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            if (r13 & 0xFF000700) /* this column need clipping */
12994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
13004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                p_cur -= 4;
13014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                for (i = 0; i < 4; i++)
13024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                {
13034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    p_ref = in + i;
13044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    p_cur -= outpitch;  /* compensate for the first offset */
13054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    tmp = (uint32)(p_ref + ref_offset); /* limit */
13064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    while ((uint32)p_ref < tmp)
13074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    {                           /* loop un-rolled */
13084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r0 = *(p_ref - (inpitch << 1));
13094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r1 = *(p_ref - inpitch);
13104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r2 = *p_ref;
13114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r3 = *(p_ref += inpitch);  /* modify pointer before loading */
13124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r4 = *(p_ref += inpitch);
13134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        /* first pixel */
13144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r5 = *(p_ref += inpitch);
13154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (r0 + r5);
13164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r0 = (r1 + r4);
13174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
13184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r0 = (r2 + r3);
13194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
13204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (result + 16) >> 5;
13214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        CLIP_RESULT(result)
13224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        *(p_cur += outpitch) = result;
13234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        /* second pixel */
13244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r0 = *(p_ref += inpitch);
13254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (r1 + r0);
13264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r1 = (r2 + r5);
13274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
13284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r1 = (r3 + r4);
13294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
13304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (result + 16) >> 5;
13314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        CLIP_RESULT(result)
13324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        *(p_cur += outpitch) = result;
13334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        /* third pixel */
13344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r1 = *(p_ref += inpitch);
13354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (r2 + r1);
13364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r2 = (r3 + r0);
13374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
13384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r2 = (r4 + r5);
13394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
13404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (result + 16) >> 5;
13414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        CLIP_RESULT(result)
13424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        *(p_cur += outpitch) = result;
13434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        /* fourth pixel */
13444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r2 = *(p_ref += inpitch);
13454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (r3 + r2);
13464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r3 = (r4 + r1);
13474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
13484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        r3 = (r5 + r0);
13494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
13504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        result = (result + 16) >> 5;
13514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        CLIP_RESULT(result)
13524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        *(p_cur += outpitch) = result;
13534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                        p_ref -= (inpitch << 1);  /* move back to center of the filter of the next one */
13544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    }
13554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    p_cur += (curr_offset - 3);
13564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                }
13574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
13584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
13594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
13604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
13614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    return ;
13624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber}
13634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
13644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid VertInterp2MC(uint8 *in, int inpitch, int *out, int outpitch,
13654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                   int blkwidth, int blkheight)
13664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{
13674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int *p_cur;
13684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint8 *p_ref;
13694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint32 tmp;
13704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int result, curr_offset, ref_offset;
13714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int j, r0, r1, r2, r3, r4, r5;
13724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
13734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    p_cur = out;
13744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */
13754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    ref_offset = blkheight * inpitch; /* for limit */
13764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
13774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    for (j = 0; j < blkwidth; j++)
13784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
13794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        p_cur -= outpitch; /* compensate for the first offset */
13804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        p_ref = in++;
13814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
13824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        tmp = (uint32)(p_ref + ref_offset); /* limit */
13834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        while ((uint32)p_ref < tmp)
13844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {                           /* loop un-rolled */
13854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 = *(p_ref - (inpitch << 1));
13864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = *(p_ref - inpitch);
13874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 = *p_ref;
13884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r3 = *(p_ref += inpitch);  /* modify pointer before loading */
13894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r4 = *(p_ref += inpitch);
13904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            /* first pixel */
13914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r5 = *(p_ref += inpitch);
13924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result = (r0 + r5);
13934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 = (r1 + r4);
13944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
13954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 = (r2 + r3);
13964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
13974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            *(p_cur += outpitch) = result;
13984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            /* second pixel */
13994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 = *(p_ref += inpitch);
14004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result = (r1 + r0);
14014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = (r2 + r5);
14024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
14034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = (r3 + r4);
14044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
14054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            *(p_cur += outpitch) = result;
14064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            /* third pixel */
14074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = *(p_ref += inpitch);
14084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result = (r2 + r1);
14094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 = (r3 + r0);
14104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
14114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 = (r4 + r5);
14124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
14134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            *(p_cur += outpitch) = result;
14144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            /* fourth pixel */
14154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 = *(p_ref += inpitch);
14164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result = (r3 + r2);
14174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r3 = (r4 + r1);
14184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
14194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r3 = (r5 + r0);
14204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
14214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            *(p_cur += outpitch) = result;
14224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_ref -= (inpitch << 1);  /* move back to center of the filter of the next one */
14234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
14244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        p_cur += curr_offset;
14254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
14264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
14274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    return ;
14284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber}
14294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
14304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid VertInterp3MC(int *in, int inpitch, uint8 *out, int outpitch,
14314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                   int blkwidth, int blkheight, int dy)
14324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{
14334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint8 *p_cur;
14344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int *p_ref;
14354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint32 tmp;
14364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int result, result2, curr_offset, ref_offset;
14374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int j, r0, r1, r2, r3, r4, r5;
14384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
14394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    p_cur = out;
14404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically back up and one pixel to right */
14414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    ref_offset = blkheight * inpitch; /* for limit */
14424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
14434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    if (dy&1)
14444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
14454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        dy = (dy >> 1) ? -(inpitch << 1) : -(inpitch << 1) - inpitch;
14464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
14474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        for (j = 0; j < blkwidth; j++)
14484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
14494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_cur -= outpitch; /* compensate for the first offset */
14504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_ref = in++;
14514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
14524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            tmp = (uint32)(p_ref + ref_offset); /* limit */
14534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            while ((uint32)p_ref < tmp)
14544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {                           /* loop un-rolled */
14554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = *(p_ref - (inpitch << 1));
14564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = *(p_ref - inpitch);
14574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = *p_ref;
14584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r3 = *(p_ref += inpitch);  /* modify pointer before loading */
14594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r4 = *(p_ref += inpitch);
14604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* first pixel */
14614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 = *(p_ref += inpitch);
14624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (r0 + r5);
14634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = (r1 + r4);
14644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
14654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = (r2 + r3);
14664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
14674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + 512) >> 10;
14684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result)
14694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result2 = ((p_ref[dy] + 16) >> 5);
14704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result2)
14714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* 3/4 pel,  no need to clip */
14724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + result2 + 1);
14734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result >> 1);
14744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                *(p_cur += outpitch) = result;
14754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* second pixel */
14764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = *(p_ref += inpitch);
14774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (r1 + r0);
14784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = (r2 + r5);
14794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
14804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = (r3 + r4);
14814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
14824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + 512) >> 10;
14834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result)
14844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result2 = ((p_ref[dy] + 16) >> 5);
14854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result2)
14864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* 3/4 pel,  no need to clip */
14874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + result2 + 1);
14884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result >> 1);
14894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                *(p_cur += outpitch) = result;
14904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* third pixel */
14914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = *(p_ref += inpitch);
14924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (r2 + r1);
14934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = (r3 + r0);
14944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
14954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = (r4 + r5);
14964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
14974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + 512) >> 10;
14984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result)
14994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result2 = ((p_ref[dy] + 16) >> 5);
15004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result2)
15014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* 3/4 pel,  no need to clip */
15024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + result2 + 1);
15034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result >> 1);
15044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                *(p_cur += outpitch) = result;
15054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* fourth pixel */
15064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = *(p_ref += inpitch);
15074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (r3 + r2);
15084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r3 = (r4 + r1);
15094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
15104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r3 = (r5 + r0);
15114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
15124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + 512) >> 10;
15134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result)
15144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result2 = ((p_ref[dy] + 16) >> 5);
15154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result2)
15164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* 3/4 pel,  no need to clip */
15174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + result2 + 1);
15184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result >> 1);
15194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                *(p_cur += outpitch) = result;
15204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                p_ref -= (inpitch << 1);  /* move back to center of the filter of the next one */
15214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
15224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_cur += curr_offset;
15234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
15244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
15254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    else
15264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
15274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        for (j = 0; j < blkwidth; j++)
15284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
15294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_cur -= outpitch; /* compensate for the first offset */
15304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_ref = in++;
15314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
15324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            tmp = (uint32)(p_ref + ref_offset); /* limit */
15334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            while ((uint32)p_ref < tmp)
15344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {                           /* loop un-rolled */
15354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = *(p_ref - (inpitch << 1));
15364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = *(p_ref - inpitch);
15374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = *p_ref;
15384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r3 = *(p_ref += inpitch);  /* modify pointer before loading */
15394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r4 = *(p_ref += inpitch);
15404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* first pixel */
15414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 = *(p_ref += inpitch);
15424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (r0 + r5);
15434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = (r1 + r4);
15444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
15454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = (r2 + r3);
15464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
15474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + 512) >> 10;
15484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result)
15494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                *(p_cur += outpitch) = result;
15504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* second pixel */
15514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = *(p_ref += inpitch);
15524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (r1 + r0);
15534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = (r2 + r5);
15544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
15554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = (r3 + r4);
15564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
15574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + 512) >> 10;
15584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result)
15594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                *(p_cur += outpitch) = result;
15604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* third pixel */
15614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = *(p_ref += inpitch);
15624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (r2 + r1);
15634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = (r3 + r0);
15644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
15654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = (r4 + r5);
15664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
15674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + 512) >> 10;
15684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result)
15694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                *(p_cur += outpitch) = result;
15704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* fourth pixel */
15714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = *(p_ref += inpitch);
15724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (r3 + r2);
15734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r3 = (r4 + r1);
15744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
15754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r3 = (r5 + r0);
15764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
15774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + 512) >> 10;
15784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result)
15794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                *(p_cur += outpitch) = result;
15804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                p_ref -= (inpitch << 1);  /* move back to center of the filter of the next one */
15814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
15824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_cur += curr_offset;
15834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
15844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
15854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
15864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    return ;
15874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber}
15884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
15894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid DiagonalInterpMC(uint8 *in1, uint8 *in2, int inpitch,
15904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                      uint8 *out, int outpitch,
15914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                      int blkwidth, int blkheight)
15924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{
15934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int j, i;
15944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int result;
15954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint8 *p_cur, *p_ref, *p_tmp8;
15964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int curr_offset, ref_offset;
15974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint8 tmp_res[24][24], tmp_in[24][24];
15984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint32 *p_tmp;
15994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint32 tmp, pkres, tmp_result;
16004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int32 r0, r1, r2, r3, r4, r5;
16014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int32 r6, r7, r8, r9, r10, r13;
16024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
16034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    ref_offset = inpitch - blkwidth;
16044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    p_ref = in1 - 2;
16054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    /* perform horizontal interpolation */
16064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    /* not word-aligned */
16074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    /* It is faster to read 1 byte at time to avoid calling CreateAlign */
16084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    /*  if(((uint32)p_ref)&0x3)
16094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
16104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            CreateAlign(p_ref,inpitch,0,&tmp_in[0][0],blkwidth+8,blkheight);
16114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_ref = &tmp_in[0][0];
16124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ref_offset = 24-blkwidth;
16134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }*/
16144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
16154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    p_tmp = (uint32*) & (tmp_res[0][0]);
16164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    for (j = blkheight; j > 0; j--)
16174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
16184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        r13 = 0;
16194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        tmp = (uint32)(p_ref + blkwidth);
16204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
16214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        //r0 = *((uint32*)p_ref);   /* d,c,b,a */
16224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        //r1 = (r0>>8)&0xFF00FF;    /* 0,d,0,b */
16234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        //r0 &= 0xFF00FF;           /* 0,c,0,a */
16244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        /* It is faster to read 1 byte at a time,  */
16254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        r0 = p_ref[0];
16264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        r1 = p_ref[2];
16274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        r0 |= (r1 << 16);           /* 0,c,0,a */
16284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        r1 = p_ref[1];
16294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        r2 = p_ref[3];
16304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        r1 |= (r2 << 16);           /* 0,d,0,b */
16314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
16324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        while ((uint32)p_ref < tmp)
16334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
16344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            //r2 = *((uint32*)(p_ref+=4));/* h,g,f,e */
16354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            //r3 = (r2>>8)&0xFF00FF;  /* 0,h,0,f */
16364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            //r2 &= 0xFF00FF;           /* 0,g,0,e */
16374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            /* It is faster to read 1 byte at a time,  */
16384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 = *(p_ref += 4);
16394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r3 = p_ref[2];
16404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 |= (r3 << 16);           /* 0,g,0,e */
16414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r3 = p_ref[1];
16424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r4 = p_ref[3];
16434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r3 |= (r4 << 16);           /* 0,h,0,f */
16444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
16454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r4 = r0 + r3;       /* c+h, a+f */
16464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r5 = r0 + r1;   /* c+d, a+b */
16474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r6 = r2 + r3;   /* g+h, e+f */
16484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r5 >>= 16;
16494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r5 |= (r6 << 16);   /* e+f, c+d */
16504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r4 += r5 * 20;      /* c+20*e+20*f+h, a+20*c+20*d+f */
16514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r4 += 0x100010; /* +16, +16 */
16524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r5 = r1 + r2;       /* d+g, b+e */
16534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r4 -= r5 * 5;       /* c-5*d+20*e+20*f-5*g+h, a-5*b+20*c+20*d-5*e+f */
16544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r4 >>= 5;
16554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r13 |= r4;      /* check clipping */
16564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r4 &= 0xFF00FF; /* mask */
16574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
16584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r5 = p_ref[4];  /* i */
16594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r6 = (r5 << 16);
16604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r5 = r6 | (r2 >> 16);/* 0,i,0,g */
16614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r5 += r1;       /* d+i, b+g */ /* r5 not free */
16624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 >>= 16;
16634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 |= (r3 << 16); /* 0,f,0,d */ /* r1 has changed */
16644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 += r2;       /* f+g, d+e */
16654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r5 += 20 * r1;  /* d+20f+20g+i, b+20d+20e+g */
16664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 >>= 16;
16674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 |= (r2 << 16); /* 0,e,0,c */ /* r0 has changed */
16684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 += r3;       /* e+h, c+f */
16694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r5 += 0x100010; /* 16,16 */
16704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r5 -= r0 * 5;       /* d-5e+20f+20g-5h+i, b-5c+20d+20e-5f+g */
16714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r5 >>= 5;
16724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r13 |= r5;      /* check clipping */
16734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r5 &= 0xFF00FF; /* mask */
16744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
16754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r4 |= (r5 << 8);    /* pack them together */
16764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            *p_tmp++ = r4;
16774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = r3;
16784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 = r2;
16794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
16804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        p_tmp += ((24 - blkwidth) >> 2); /* move to the next line */
16814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        p_ref += ref_offset;  /*    ref_offset = inpitch-blkwidth; */
16824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
16834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        if (r13&0xFF000700) /* need clipping */
16844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
16854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            /* move back to the beginning of the line */
16864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_ref -= (ref_offset + blkwidth);   /* input */
16874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_tmp -= 6; /* intermediate output */
16884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            tmp = (uint32)(p_ref + blkwidth);
16894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            while ((uint32)p_ref < tmp)
16904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
16914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = *p_ref++;
16924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = *p_ref++;
16934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = *p_ref++;
16944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r3 = *p_ref++;
16954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r4 = *p_ref++;
16964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* first pixel */
16974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r5 = *p_ref++;
16984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (r0 + r5);
16994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = (r1 + r4);
17004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
17014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = (r2 + r3);
17024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
17034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + 16) >> 5;
17044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result)
17054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                pkres = result;
17064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* second pixel */
17074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r0 = *p_ref++;
17084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (r1 + r0);
17094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = (r2 + r5);
17104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
17114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = (r3 + r4);
17124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
17134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + 16) >> 5;
17144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result)
17154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                pkres |= (result << 8);
17164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* third pixel */
17174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r1 = *p_ref++;
17184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (r2 + r1);
17194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = (r3 + r0);
17204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
17214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = (r4 + r5);
17224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
17234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + 16) >> 5;
17244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result)
17254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                pkres |= (result << 16);
17264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                /* fourth pixel */
17274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r2 = *p_ref++;
17284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (r3 + r2);
17294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r3 = (r4 + r1);
17304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
17314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                r3 = (r5 + r0);
17324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
17334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                result = (result + 16) >> 5;
17344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                CLIP_RESULT(result)
17354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                pkres |= (result << 24);
17364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
17374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                *p_tmp++ = pkres; /* write 4 pixel */
17384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                p_ref -= 5;
17394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
17404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_tmp += ((24 - blkwidth) >> 2); /* move to the next line */
17414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_ref += ref_offset;  /*    ref_offset = inpitch-blkwidth; */
17424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
17434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
17444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
17454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    /*  perform vertical interpolation */
17464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    /* not word-aligned */
17474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    if (((uint32)in2)&0x3)
17484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
17494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        CreateAlign(in2, inpitch, -2, &tmp_in[0][0], blkwidth, blkheight + 5);
17504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        in2 = &tmp_in[2][0];
17514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        inpitch = 24;
17524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
17534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
17544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    p_cur = out;
17554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    curr_offset = 1 - outpitch * (blkheight - 1); /* offset vertically up and one pixel right */
17564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    pkres = blkheight * inpitch; /* reuse it for limit */
17574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
17584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    curr_offset += 3;
17594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
17604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    for (j = 0; j < blkwidth; j += 4, in2 += 4)
17614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
17624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        r13 = 0;
17634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        p_ref = in2;
17644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        p_tmp8 = &(tmp_res[0][j]); /* intermediate result */
17654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        p_tmp8 -= 24;  /* compensate for the first offset */
17664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        p_cur -= outpitch;  /* compensate for the first offset */
17674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        tmp = (uint32)(p_ref + pkres); /* limit */
17684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        while ((uint32)p_ref < tmp)  /* the loop un-rolled  */
17694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
17704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            /* Read 1 byte at a time is too slow, too many read and pack ops, need to call CreateAlign,  */
17714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            /*p_ref8 = p_ref-(inpitch<<1);          r0 = p_ref8[0];         r1 = p_ref8[2];
17724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 |= (r1<<16);         r6 = p_ref8[1];         r1 = p_ref8[3];
17734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r6 |= (r1<<16);         p_ref+=inpitch; */
17744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 = *((uint32*)(p_ref - (inpitch << 1))); /* load 4 bytes */
17754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_ref += inpitch;
17764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r6 = (r0 >> 8) & 0xFF00FF; /* second and fourth byte */
17774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 &= 0xFF00FF;
17784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
17794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            /*p_ref8 = p_ref+(inpitch<<1);
17804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = p_ref8[0];         r7 = p_ref8[2];         r1 |= (r7<<16);
17814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r7 = p_ref8[1];         r2 = p_ref8[3];         r7 |= (r2<<16);*/
17824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = *((uint32*)(p_ref + (inpitch << 1)));  /* r1, r7, ref[3] */
17834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r7 = (r1 >> 8) & 0xFF00FF;
17844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 &= 0xFF00FF;
17854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
17864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 += r1;
17874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r6 += r7;
17884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
17894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            /*r2 = p_ref[0];            r8 = p_ref[2];          r2 |= (r8<<16);
17904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r8 = p_ref[1];          r1 = p_ref[3];          r8 |= (r1<<16);*/
17914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 = *((uint32*)p_ref); /* r2, r8, ref[1] */
17924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r8 = (r2 >> 8) & 0xFF00FF;
17934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 &= 0xFF00FF;
17944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
17954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            /*p_ref8 = p_ref-inpitch;           r1 = p_ref8[0];         r7 = p_ref8[2];
17964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 |= (r7<<16);         r1 += r2;           r7 = p_ref8[1];
17974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 = p_ref8[3];         r7 |= (r2<<16);*/
17984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = *((uint32*)(p_ref - inpitch)); /* r1, r7, ref[0] */
17994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r7 = (r1 >> 8) & 0xFF00FF;
18004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 &= 0xFF00FF;
18014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 += r2;
18024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
18034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r7 += r8;
18044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
18054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 += 20 * r1;
18064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r6 += 20 * r7;
18074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 += 0x100010;
18084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r6 += 0x100010;
18094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
18104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            /*p_ref8 = p_ref-(inpitch<<1);          r2 = p_ref8[0];         r8 = p_ref8[2];
18114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 |= (r8<<16);         r8 = p_ref8[1];         r1 = p_ref8[3];         r8 |= (r1<<16);*/
18124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 = *((uint32*)(p_ref - (inpitch << 1))); /* r2, r8, ref[-1] */
18134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r8 = (r2 >> 8) & 0xFF00FF;
18144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 &= 0xFF00FF;
18154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
18164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            /*p_ref8 = p_ref+inpitch;           r1 = p_ref8[0];         r7 = p_ref8[2];
18174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 |= (r7<<16);         r1 += r2;           r7 = p_ref8[1];
18184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 = p_ref8[3];         r7 |= (r2<<16);*/
18194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = *((uint32*)(p_ref + inpitch)); /* r1, r7, ref[2] */
18204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r7 = (r1 >> 8) & 0xFF00FF;
18214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 &= 0xFF00FF;
18224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 += r2;
18234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
18244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r7 += r8;
18254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
18264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 -= 5 * r1;
18274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r6 -= 5 * r7;
18284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
18294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 >>= 5;
18304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r6 >>= 5;
18314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            /* clip */
18324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r13 |= r6;
18334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r13 |= r0;
18344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            //CLIPPACK(r6,result)
18354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            /* add with horizontal results */
18364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r10 = *((uint32*)(p_tmp8 += 24));
18374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r9 = (r10 >> 8) & 0xFF00FF;
18384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r10 &= 0xFF00FF;
18394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
18404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 += r10;
18414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 += 0x10001;
18424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 = (r0 >> 1) & 0xFF00FF;   /* mask to 8 bytes */
18434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
18444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r6 += r9;
18454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r6 += 0x10001;
18464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r6 = (r6 >> 1) & 0xFF00FF;   /* mask to 8 bytes */
18474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
18484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 |= (r6 << 8);  /* pack it back */
18494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            *((uint32*)(p_cur += outpitch)) = r0;
18504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
18514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        p_cur += curr_offset; /* offset to the next pixel */
18524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        if (r13 & 0xFF000700) /* this column need clipping */
18534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
18544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            p_cur -= 4;
18554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            for (i = 0; i < 4; i++)
18564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
18574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                p_ref = in2 + i;
18584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                p_tmp8 = &(tmp_res[0][j+i]); /* intermediate result */
18594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                p_tmp8 -= 24;  /* compensate for the first offset */
18604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                p_cur -= outpitch;  /* compensate for the first offset */
18614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                tmp = (uint32)(p_ref + pkres); /* limit */
18624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                while ((uint32)p_ref < tmp)  /* the loop un-rolled  */
18634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                {
18644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r0 = *(p_ref - (inpitch << 1));
18654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r1 = *(p_ref - inpitch);
18664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r2 = *p_ref;
18674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r3 = *(p_ref += inpitch);  /* modify pointer before loading */
18684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r4 = *(p_ref += inpitch);
18694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    /* first pixel */
18704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r5 = *(p_ref += inpitch);
18714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (r0 + r5);
18724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r0 = (r1 + r4);
18734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result -= (r0 * 5);//result -= r0;  result -= (r0<<2);
18744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r0 = (r2 + r3);
18754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result += (r0 * 20);//result += (r0<<4);    result += (r0<<2);
18764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result + 16) >> 5;
18774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    CLIP_RESULT(result)
18784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    tmp_result = *(p_tmp8 += 24);  /* modify pointer before loading */
18794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result + tmp_result + 1);  /* no clip */
18804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result >> 1);
18814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    *(p_cur += outpitch) = result;
18824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    /* second pixel */
18834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r0 = *(p_ref += inpitch);
18844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (r1 + r0);
18854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r1 = (r2 + r5);
18864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result -= (r1 * 5);//result -= r1;  result -= (r1<<2);
18874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r1 = (r3 + r4);
18884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result += (r1 * 20);//result += (r1<<4);    result += (r1<<2);
18894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result + 16) >> 5;
18904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    CLIP_RESULT(result)
18914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    tmp_result = *(p_tmp8 += 24);  /* intermediate result */
18924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result + tmp_result + 1);  /* no clip */
18934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result >> 1);
18944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    *(p_cur += outpitch) = result;
18954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    /* third pixel */
18964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r1 = *(p_ref += inpitch);
18974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (r2 + r1);
18984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r2 = (r3 + r0);
18994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result -= (r2 * 5);//result -= r2;  result -= (r2<<2);
19004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r2 = (r4 + r5);
19014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result += (r2 * 20);//result += (r2<<4);    result += (r2<<2);
19024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result + 16) >> 5;
19034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    CLIP_RESULT(result)
19044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    tmp_result = *(p_tmp8 += 24);  /* intermediate result */
19054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result + tmp_result + 1);  /* no clip */
19064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result >> 1);
19074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    *(p_cur += outpitch) = result;
19084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    /* fourth pixel */
19094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r2 = *(p_ref += inpitch);
19104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (r3 + r2);
19114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r3 = (r4 + r1);
19124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result -= (r3 * 5);//result -= r3;  result -= (r3<<2);
19134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    r3 = (r5 + r0);
19144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result += (r3 * 20);//result += (r3<<4);    result += (r3<<2);
19154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result + 16) >> 5;
19164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    CLIP_RESULT(result)
19174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    tmp_result = *(p_tmp8 += 24);  /* intermediate result */
19184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result + tmp_result + 1);  /* no clip */
19194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    result = (result >> 1);
19204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    *(p_cur += outpitch) = result;
19214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                    p_ref -= (inpitch << 1);  /* move back to center of the filter of the next one */
19224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                }
19234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                p_cur += (curr_offset - 3);
19244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
19254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
19264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
19274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
19284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    return ;
19294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber}
19304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
19314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber/* position G */
19324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid FullPelMC(uint8 *in, int inpitch, uint8 *out, int outpitch,
19334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber               int blkwidth, int blkheight)
19344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{
19354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int i, j;
19364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int offset_in = inpitch - blkwidth;
19374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int offset_out = outpitch - blkwidth;
19384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint32 temp;
19394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint8 byte;
19404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
19414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    if (((uint32)in)&3)
19424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
19434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        for (j = blkheight; j > 0; j--)
19444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
19454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            for (i = blkwidth; i > 0; i -= 4)
19464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
19474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                temp = *in++;
19484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                byte = *in++;
19494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                temp |= (byte << 8);
19504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                byte = *in++;
19514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                temp |= (byte << 16);
19524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                byte = *in++;
19534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                temp |= (byte << 24);
19544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
19554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                *((uint32*)out) = temp; /* write 4 bytes */
19564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                out += 4;
19574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
19584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            out += offset_out;
19594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            in += offset_in;
19604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
19614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
19624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    else
19634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
19644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        for (j = blkheight; j > 0; j--)
19654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
19664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            for (i = blkwidth; i > 0; i -= 4)
19674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
19684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                temp = *((uint32*)in);
19694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                *((uint32*)out) = temp;
19704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                in += 4;
19714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                out += 4;
19724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
19734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            out += offset_out;
19744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            in += offset_in;
19754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
19764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
19774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    return ;
19784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber}
19794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
19804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid ChromaMotionComp(uint8 *ref, int picwidth, int picheight,
19814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                      int x_pos, int y_pos,
19824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                      uint8 *pred, int pred_pitch,
19834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                      int blkwidth, int blkheight)
19844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{
19854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int dx, dy;
19864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int offset_dx, offset_dy;
19874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int index;
19884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint8 temp[24][24];
19894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
19904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    dx = x_pos & 7;
19914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    dy = y_pos & 7;
19924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    offset_dx = (dx + 7) >> 3;
19934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    offset_dy = (dy + 7) >> 3;
19944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    x_pos = x_pos >> 3;  /* round it to full-pel resolution */
19954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    y_pos = y_pos >> 3;
19964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
19974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    if ((x_pos >= 0 && x_pos + blkwidth + offset_dx <= picwidth) && (y_pos >= 0 && y_pos + blkheight + offset_dy <= picheight))
19984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
19994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        ref += y_pos * picwidth + x_pos;
20004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
20014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    else
20024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
20034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        CreatePad(ref, picwidth, picheight, x_pos, y_pos, &temp[0][0], blkwidth + offset_dx, blkheight + offset_dy);
20044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        ref = &temp[0][0];
20054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        picwidth = 24;
20064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
20074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
20084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    index = offset_dx + (offset_dy << 1) + ((blkwidth << 1) & 0x7);
20094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
20104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    (*(ChromaMC_SIMD[index]))(ref, picwidth , dx, dy, pred, pred_pitch, blkwidth, blkheight);
20114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    return ;
20124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber}
20134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
20144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
20154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber/* SIMD routines, unroll the loops in vertical direction, decreasing loops (things to be done)  */
20164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid ChromaDiagonalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
20174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                           uint8 *pOut, int predPitch, int blkwidth, int blkheight)
20184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{
20194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int32 r0, r1, r2, r3, result0, result1;
20204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint8 temp[288];
20214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint8 *ref, *out;
20224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int i, j;
20234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int dx_8 = 8 - dx;
20244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int dy_8 = 8 - dy;
20254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
20264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    /* horizontal first */
20274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    out = temp;
20284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    for (i = 0; i < blkheight + 1; i++)
20294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
20304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        ref = pRef;
20314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        r0 = ref[0];
20324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        for (j = 0; j < blkwidth; j += 4)
20334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
20344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 |= (ref[2] << 16);
20354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result0 = dx_8 * r0;
20364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
20374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = ref[1] | (ref[3] << 16);
20384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result0 += dx * r1;
20394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            *(int32 *)out = result0;
20404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
20414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result0 = dx_8 * r1;
20424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
20434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 = ref[4];
20444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 = r0 >> 16;
20454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = r0 | (r2 << 16);
20464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result0 += dx * r1;
20474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            *(int32 *)(out + 16) = result0;
20484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
20494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ref += 4;
20504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            out += 4;
20514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 = r2;
20524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
20534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        pRef += srcPitch;
20544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        out += (32 - blkwidth);
20554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
20564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
20574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber//  pRef -= srcPitch*(blkheight+1);
20584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    ref = temp;
20594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
20604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    for (j = 0; j < blkwidth; j += 4)
20614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
20624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        r0 = *(int32 *)ref;
20634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        r1 = *(int32 *)(ref + 16);
20644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        ref += 32;
20654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        out = pOut;
20664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        for (i = 0; i < (blkheight >> 1); i++)
20674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
20684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result0 = dy_8 * r0 + 0x00200020;
20694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 = *(int32 *)ref;
20704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result0 += dy * r2;
20714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result0 >>= 6;
20724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result0 &= 0x00FF00FF;
20734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 = r2;
20744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
20754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result1 = dy_8 * r1 + 0x00200020;
20764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r3 = *(int32 *)(ref + 16);
20774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result1 += dy * r3;
20784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result1 >>= 6;
20794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result1 &= 0x00FF00FF;
20804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = r3;
20814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            *(int32 *)out = result0 | (result1 << 8);
20824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            out += predPitch;
20834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ref += 32;
20844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
20854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result0 = dy_8 * r0 + 0x00200020;
20864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 = *(int32 *)ref;
20874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result0 += dy * r2;
20884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result0 >>= 6;
20894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result0 &= 0x00FF00FF;
20904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 = r2;
20914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
20924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result1 = dy_8 * r1 + 0x00200020;
20934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r3 = *(int32 *)(ref + 16);
20944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result1 += dy * r3;
20954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result1 >>= 6;
20964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result1 &= 0x00FF00FF;
20974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = r3;
20984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            *(int32 *)out = result0 | (result1 << 8);
20994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            out += predPitch;
21004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ref += 32;
21014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
21024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        pOut += 4;
21034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        ref = temp + 4; /* since it can only iterate twice max  */
21044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
21054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    return;
21064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber}
21074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
21084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid ChromaHorizontalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
21094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                             uint8 *pOut, int predPitch, int blkwidth, int blkheight)
21104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{
21114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    OSCL_UNUSED_ARG(dy);
21124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int32 r0, r1, r2, result0, result1;
21134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint8 *ref, *out;
21144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int i, j;
21154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int dx_8 = 8 - dx;
21164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
21174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    /* horizontal first */
21184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    for (i = 0; i < blkheight; i++)
21194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
21204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        ref = pRef;
21214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        out = pOut;
21224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
21234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        r0 = ref[0];
21244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        for (j = 0; j < blkwidth; j += 4)
21254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
21264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 |= (ref[2] << 16);
21274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result0 = dx_8 * r0 + 0x00040004;
21284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
21294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = ref[1] | (ref[3] << 16);
21304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result0 += dx * r1;
21314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result0 >>= 3;
21324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result0 &= 0x00FF00FF;
21334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
21344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result1 = dx_8 * r1 + 0x00040004;
21354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
21364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 = ref[4];
21374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 = r0 >> 16;
21384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = r0 | (r2 << 16);
21394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result1 += dx * r1;
21404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result1 >>= 3;
21414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result1 &= 0x00FF00FF;
21424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
21434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            *(int32 *)out = result0 | (result1 << 8);
21444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
21454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ref += 4;
21464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            out += 4;
21474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 = r2;
21484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
21494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
21504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        pRef += srcPitch;
21514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        pOut += predPitch;
21524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
21534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    return;
21544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber}
21554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
21564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid ChromaVerticalMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
21574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                           uint8 *pOut, int predPitch, int blkwidth, int blkheight)
21584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{
21594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    OSCL_UNUSED_ARG(dx);
21604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int32 r0, r1, r2, r3, result0, result1;
21614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int i, j;
21624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint8 *ref, *out;
21634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int dy_8 = 8 - dy;
21644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    /* vertical first */
21654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    for (i = 0; i < blkwidth; i += 4)
21664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
21674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        ref = pRef;
21684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        out = pOut;
21694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
21704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        r0 = ref[0] | (ref[2] << 16);
21714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        r1 = ref[1] | (ref[3] << 16);
21724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        ref += srcPitch;
21734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        for (j = 0; j < blkheight; j++)
21744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
21754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result0 = dy_8 * r0 + 0x00040004;
21764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r2 = ref[0] | (ref[2] << 16);
21774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result0 += dy * r2;
21784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result0 >>= 3;
21794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result0 &= 0x00FF00FF;
21804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r0 = r2;
21814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
21824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result1 = dy_8 * r1 + 0x00040004;
21834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r3 = ref[1] | (ref[3] << 16);
21844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result1 += dy * r3;
21854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result1 >>= 3;
21864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            result1 &= 0x00FF00FF;
21874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            r1 = r3;
21884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            *(int32 *)out = result0 | (result1 << 8);
21894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            ref += srcPitch;
21904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            out += predPitch;
21914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
21924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        pOut += 4;
21934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        pRef += 4;
21944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
21954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    return;
21964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber}
21974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
21984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid ChromaDiagonalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
21994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                            uint8 *pOut,  int predPitch, int blkwidth, int blkheight)
22004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{
22014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    OSCL_UNUSED_ARG(blkwidth);
22024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int32 r0, r1, temp0, temp1, result;
22034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int32 temp[9];
22044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int32 *out;
22054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int i, r_temp;
22064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int dy_8 = 8 - dy;
22074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
22084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    /* horizontal first */
22094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    out = temp;
22104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    for (i = 0; i < blkheight + 1; i++)
22114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
22124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        r_temp = pRef[1];
22134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        temp0 = (pRef[0] << 3) + dx * (r_temp - pRef[0]);
22144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        temp1 = (r_temp << 3) + dx * (pRef[2] - r_temp);
22154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        r0 = temp0 | (temp1 << 16);
22164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        *out++ = r0;
22174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        pRef += srcPitch;
22184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
22194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
22204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    pRef -= srcPitch * (blkheight + 1);
22214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
22224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    out = temp;
22234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
22244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    r0 = *out++;
22254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
22264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    for (i = 0; i < blkheight; i++)
22274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
22284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        result = dy_8 * r0 + 0x00200020;
22294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        r1 = *out++;
22304a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        result += dy * r1;
22314a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        result >>= 6;
22324a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        result &= 0x00FF00FF;
22334a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        *(int16 *)pOut = (result >> 8) | (result & 0xFF);
22344a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        r0 = r1;
22354a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        pOut += predPitch;
22364a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
22374a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    return;
22384a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber}
22394a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
22404a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid ChromaHorizontalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
22414a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                              uint8 *pOut, int predPitch, int blkwidth, int blkheight)
22424a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{
22434a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    OSCL_UNUSED_ARG(dy);
22444a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    OSCL_UNUSED_ARG(blkwidth);
22454a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int i, temp, temp0, temp1;
22464a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
22474a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    /* horizontal first */
22484a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    for (i = 0; i < blkheight; i++)
22494a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
22504a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        temp = pRef[1];
22514a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        temp0 = ((pRef[0] << 3) + dx * (temp - pRef[0]) + 4) >> 3;
22524a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        temp1 = ((temp << 3) + dx * (pRef[2] - temp) + 4) >> 3;
22534a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
22544a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        *(int16 *)pOut = temp0 | (temp1 << 8);
22554a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        pRef += srcPitch;
22564a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        pOut += predPitch;
22574a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
22584a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
22594a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    return;
22604a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber}
22614a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid ChromaVerticalMC2_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
22624a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                            uint8 *pOut, int predPitch, int blkwidth, int blkheight)
22634a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{
22644a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    OSCL_UNUSED_ARG(dx);
22654a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    OSCL_UNUSED_ARG(blkwidth);
22664a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int32 r0, r1, result;
22674a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int i;
22684a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int dy_8 = 8 - dy;
22694a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    r0 = pRef[0] | (pRef[1] << 16);
22704a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    pRef += srcPitch;
22714a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    for (i = 0; i < blkheight; i++)
22724a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
22734a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        result = dy_8 * r0 + 0x00040004;
22744a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        r1 = pRef[0] | (pRef[1] << 16);
22754a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        result += dy * r1;
22764a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        result >>= 3;
22774a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        result &= 0x00FF00FF;
22784a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        *(int16 *)pOut = (result >> 8) | (result & 0xFF);
22794a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        r0 = r1;
22804a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        pRef += srcPitch;
22814a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        pOut += predPitch;
22824a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
22834a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    return;
22844a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber}
22854a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
22864a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Hubervoid ChromaFullMC_SIMD(uint8 *pRef, int srcPitch, int dx, int dy,
22874a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                       uint8 *pOut, int predPitch, int blkwidth, int blkheight)
22884a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber{
22894a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    OSCL_UNUSED_ARG(dx);
22904a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    OSCL_UNUSED_ARG(dy);
22914a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int i, j;
22924a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int offset_in = srcPitch - blkwidth;
22934a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    int offset_out = predPitch - blkwidth;
22944a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint16 temp;
22954a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    uint8 byte;
22964a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber
22974a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    if (((uint32)pRef)&1)
22984a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
22994a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        for (j = blkheight; j > 0; j--)
23004a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
23014a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            for (i = blkwidth; i > 0; i -= 2)
23024a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
23034a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                temp = *pRef++;
23044a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                byte = *pRef++;
23054a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                temp |= (byte << 8);
23064a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                *((uint16*)pOut) = temp; /* write 2 bytes */
23074a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                pOut += 2;
23084a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
23094a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            pOut += offset_out;
23104a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            pRef += offset_in;
23114a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
23124a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
23134a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    else
23144a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    {
23154a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        for (j = blkheight; j > 0; j--)
23164a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        {
23174a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            for (i = blkwidth; i > 0; i -= 2)
23184a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            {
23194a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                temp = *((uint16*)pRef);
23204a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                *((uint16*)pOut) = temp;
23214a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                pRef += 2;
23224a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber                pOut += 2;
23234a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            }
23244a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            pOut += offset_out;
23254a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber            pRef += offset_in;
23264a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber        }
23274a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    }
23284a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber    return ;
23294a0ec3fda9c0e8e74b36e4e201b65ced80263b1fAndreas Huber}
2330