159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/* ------------------------------------------------------------------
259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * Copyright (C) 1998-2009 PacketVideo
359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong *
459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * Licensed under the Apache License, Version 2.0 (the "License");
559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * you may not use this file except in compliance with the License.
659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * You may obtain a copy of the License at
759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong *
859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong *      http://www.apache.org/licenses/LICENSE-2.0
959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong *
1059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * Unless required by applicable law or agreed to in writing, software
1159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * distributed under the License is distributed on an "AS IS" BASIS,
1259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
1359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * express or implied.
1459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * See the License for the specific language governing permissions
1559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * and limitations under the License.
1659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong * -------------------------------------------------------------------
1759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong */
1859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#include "mp4def.h"
1959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#include "mp4enc_lib.h"
2059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#include "mp4lib_int.h"
2159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#include "m4venc_oscl.h"
2259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
2359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define VOP_OFFSET  ((lx<<4)+16)  /* for offset to image area */
2459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define CVOP_OFFSET ((lx<<2)+8)
2559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
2659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong#define PREF_INTRA  512     /* bias for INTRA coding */
2759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
2859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*===============================================================
2959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Function:   ChooseMode
3059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Date:       09/21/2000
3159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Purpose:    Choosing between INTRA or INTER
3259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Input/Output: Pointer to the starting point of the macroblock.
3359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Note:
3459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong===============================================================*/
3559f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid ChooseMode_C(UChar *Mode, UChar *cur, Int lx, Int min_SAD)
3659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
3759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Int i, j;
3859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Int MB_mean, A, tmp, Th;
3959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Int offset = (lx >> 2) - 4;
4059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    UChar *p = cur;
4159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Int *pint = (Int *) cur, temp = 0;
4259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    MB_mean = 0;
4359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    A = 0;
4459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Th = (min_SAD - PREF_INTRA) >> 1;
4559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
4659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    for (j = 0; j < 8; j++)
4759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
4859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
4959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* Odd Rows */
5059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        temp += (*pint++) & 0x00FF00FF;
5159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        temp += (*pint++) & 0x00FF00FF;
5259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        temp += (*pint++) & 0x00FF00FF;
5359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        temp += (*pint++) & 0x00FF00FF;
5459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pint += offset;
5559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
5659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /* Even Rows */
5759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        temp += (*pint++ >> 8) & 0x00FF00FF;
5859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        temp += (*pint++ >> 8) & 0x00FF00FF;
5959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        temp += (*pint++ >> 8) & 0x00FF00FF;
6059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        temp += (*pint++ >> 8) & 0x00FF00FF;
6159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        pint += offset;
6259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
6359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
6459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
6559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    MB_mean = (((temp & 0x0000FFFF)) + ((temp & 0xFFFF0000) >> 16)) >> 7;
6659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
6759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    p = cur;
6859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    offset = lx - 16;
6959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    for (j = 0; j < 16; j++)
7059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
7159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        temp = (j & 1);
7259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        p += temp;
7359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        i = 8;
7459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        while (i--)
7559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        {
7659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            tmp = *p - MB_mean;
7759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            p += 2;
7859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            if (tmp > 0) A += tmp;
7959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            else    A -= tmp;
8059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        }
8159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
8259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        if (A >= Th)
8359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        {
8459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *Mode = MODE_INTER;
8559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            return ;
8659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        }
8759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        p += (offset - temp);
8859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
8959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
9059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    if (A < Th)
9159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *Mode = MODE_INTRA;
9259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    else
9359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *Mode = MODE_INTER;
9459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
9559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
9659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
9759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
9859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
9959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*===============================================================
10059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Function:   GetHalfPelMBRegion
10159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Date:       09/17/2000
10259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Purpose:    Interpolate the search region for half-pel search
10359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Input/Output:   Center of the search, Half-pel memory, width
10459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Note:       rounding type should be parameterized.
10559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                Now fixed it to zero!!!!!!
10659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
10759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong===============================================================*/
10859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
10959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
11059f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid GetHalfPelMBRegion_C(UChar *cand, UChar *hmem, Int lx)
11159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
11259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Int i, j;
11359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    UChar *p1, *p2, *p3, *p4;
11459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    UChar *hmem1 = hmem;
11559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    UChar *hmem2 = hmem1 + 33;
11659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Int offset = lx - 17;
11759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
11859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    p1 = cand - lx - 1;
11959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    p2 = cand - lx;
12059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    p3 = cand - 1;
12159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    p4 = cand;
12259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
12359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    for (j = 0; j < 16; j++)
12459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
12559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        for (i = 0; i < 16; i++)
12659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        {
12759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *hmem1++ = ((*p1++) + *p2 + *p3 + *p4 + 2) >> 2;
12859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *hmem1++ = ((*p2++) + *p4 + 1) >> 1;
12959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *hmem2++ = ((*p3++) + *p4 + 1) >> 1;
13059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *hmem2++ = *p4++;
13159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        }
13259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /*  last pixel */
13359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *hmem1++ = ((*p1++) + (*p2++) + *p3 + *p4 + 2) >> 2;
13459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *hmem2++ = ((*p3++) + (*p4++) + 1) >> 1;
13559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        hmem1 += 33;
13659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        hmem2 += 33;
13759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        p1 += offset;
13859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        p2 += offset;
13959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        p3 += offset;
14059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        p4 += offset;
14159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
14259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* last row */
14359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    for (i = 0; i < 16; i++)
14459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
14559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *hmem1++ = ((*p1++) + *p2 + (*p3++) + *p4 + 2) >> 2;
14659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *hmem1++ = ((*p2++) + (*p4++) + 1) >> 1;
14759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
14859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
14959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    *hmem1 = (*p1 + *p2 + *p3 + *p4 + 2) >> 2;
15059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
15159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
15259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
15359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
15459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*===============================================================
15559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong   Function:    GetHalfPelBlkRegion
15659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong   Date:        09/20/2000
15759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong   Purpose: Interpolate the search region for half-pel search
15859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            in 4MV mode.
15959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong   Input/Output:    Center of the search, Half-pel memory, width
16059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong   Note:        rounding type should be parameterized.
16159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            Now fixed it to zero!!!!!!
16259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
16359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong===============================================================*/
16459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
16559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
16659f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid GetHalfPelBlkRegion(UChar *cand, UChar *hmem, Int lx)
16759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
16859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Int i, j;
16959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    UChar *p1, *p2, *p3, *p4;
17059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    UChar *hmem1 = hmem;
17159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    UChar *hmem2 = hmem1 + 17;
17259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Int offset = lx - 9;
17359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
17459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    p1 = cand - lx - 1;
17559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    p2 = cand - lx;
17659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    p3 = cand - 1;
17759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    p4 = cand;
17859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
17959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    for (j = 0; j < 8; j++)
18059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
18159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        for (i = 0; i < 8; i++)
18259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        {
18359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *hmem1++ = ((*p1++) + *p2 + *p3 + *p4 + 2) >> 2;
18459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *hmem1++ = ((*p2++) + *p4 + 1) >> 1;
18559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *hmem2++ = ((*p3++) + *p4 + 1) >> 1;
18659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong            *hmem2++ = *p4++;
18759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        }
18859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        /*  last pixel */
18959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *hmem1++ = ((*p1++) + (*p2++) + *p3 + *p4 + 2) >> 2;
19059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *hmem2++ = ((*p3++) + (*p4++) + 1) >> 1;
19159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        hmem1 += 17;
19259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        hmem2 += 17;
19359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        p1 += offset;
19459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        p2 += offset;
19559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        p3 += offset;
19659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        p4 += offset;
19759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
19859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* last row */
19959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    for (i = 0; i < 8; i++)
20059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
20159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *hmem1++ = ((*p1++) + *p2 + (*p3++) + *p4 + 2) >> 2;
20259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *hmem1++ = ((*p2++) + (*p4++) + 1) >> 1;
20359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
20459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
20559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    *hmem1 = (*p1 + *p2 + *p3 + *p4 + 2) >> 2;
20659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
20759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
20859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
20959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
21059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
21159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*=====================================================================
21259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Function:   PaddingEdge
21359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Date:       09/16/2000
21459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Purpose:    Pad edge of a Vop
21559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Modification: 09/20/05.
21659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong=====================================================================*/
21759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
21859f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid  PaddingEdge(Vop *refVop)
21959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
22059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    UChar *src, *dst;
22159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Int i;
22259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Int pitch, width, height;
22359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    ULong temp1, temp2;
22459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
22559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    width = refVop->width;
22659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    height = refVop->height;
22759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    pitch = refVop->pitch;
22859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
22959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* pad top */
23059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    src = refVop->yChan;
23159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
23259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    temp1 = *src; /* top-left corner */
23359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    temp2 = src[width-1]; /* top-right corner */
23459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    temp1 |= (temp1 << 8);
23559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    temp1 |= (temp1 << 16);
23659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    temp2 |= (temp2 << 8);
23759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    temp2 |= (temp2 << 16);
23859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
23959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    dst = src - (pitch << 4);
24059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
24159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    *((ULong*)(dst - 16)) = temp1;
24259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    *((ULong*)(dst - 12)) = temp1;
24359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    *((ULong*)(dst - 8)) = temp1;
24459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    *((ULong*)(dst - 4)) = temp1;
24559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
24659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    M4VENC_MEMCPY(dst, src, width);
24759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
24859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    *((ULong*)(dst += width)) = temp2;
24959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    *((ULong*)(dst + 4)) = temp2;
25059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    *((ULong*)(dst + 8)) = temp2;
25159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    *((ULong*)(dst + 12)) = temp2;
25259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
25359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    dst = dst - width - 16;
25459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
25559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    i = 15;
25659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
25759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
25859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        M4VENC_MEMCPY(dst + pitch, dst, pitch);
25959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst += pitch;
26059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
26159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
26259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* pad sides */
26359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    dst += (pitch + 16);
26459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    src = dst;
26559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    i = height;
26659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
26759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
26859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        temp1 = *src;
26959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        temp2 = src[width-1];
27059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        temp1 |= (temp1 << 8);
27159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        temp1 |= (temp1 << 16);
27259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        temp2 |= (temp2 << 8);
27359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        temp2 |= (temp2 << 16);
27459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
27559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((ULong*)(dst - 16)) = temp1;
27659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((ULong*)(dst - 12)) = temp1;
27759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((ULong*)(dst - 8)) = temp1;
27859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((ULong*)(dst - 4)) = temp1;
27959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
28059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((ULong*)(dst += width)) = temp2;
28159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((ULong*)(dst + 4)) = temp2;
28259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((ULong*)(dst + 8)) = temp2;
28359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        *((ULong*)(dst + 12)) = temp2;
28459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
28559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        src += pitch;
28659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst = src;
28759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
28859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
28959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    /* pad bottom */
29059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    dst -= 16;
29159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    i = 16;
29259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (i--)
29359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
29459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        M4VENC_MEMCPY(dst, dst - pitch, pitch);
29559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        dst += pitch;
29659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
29759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
29859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
29959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
30059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
30159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
30259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong/*===================================================================
30359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Function:   ComputeMBSum
30459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Date:       10/28/2000
30559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Purpose:    Compute sum of absolute value (SAV) of blocks in a macroblock
30659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                in INTRA mode needed for rate control. Thus, instead of
30759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                computing the SAV, we can compute first order moment or
30859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong                variance .
30959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
31059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    11/28/00:    add MMX
31159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    9/3/01:      do parallel comp for C function.
31259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong===================================================================*/
31359f566c4ec3dfc097ad8163523e522280b27e5c3James Dongvoid ComputeMBSum_C(UChar *cur, Int lx, MOT *mot_mb)
31459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong{
31559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Int j;
31659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Int *cInt, *cInt2;
31759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Int sad1 = 0, sad2 = 0, sad3 = 0, sad4 = 0;
31859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    Int tmp, tmp2, mask = 0x00FF00FF;
31959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
32059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    cInt = (Int*)cur;   /* make sure this is word-align */
32159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    cInt2 = (Int*)(cur + (lx << 3));
32259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    j = 8;
32359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    while (j--)
32459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    {
32559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp = cInt[3];  /* load 4 pixels at a time */
32659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp2 = tmp & mask;
32759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp = (tmp >> 8) & mask;
32859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp += tmp2;
32959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        sad2 += tmp;
33059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp = cInt[2];
33159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp2 = tmp & mask;
33259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp = (tmp >> 8) & mask;
33359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp += tmp2;
33459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        sad2 += tmp;
33559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp = cInt[1];
33659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp2 = tmp & mask;
33759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp = (tmp >> 8) & mask;
33859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp += tmp2;
33959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        sad1 += tmp;
34059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp = *cInt;
34159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        cInt += (lx >> 2);
34259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp2 = tmp & mask;
34359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp = (tmp >> 8) & mask;
34459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp += tmp2;
34559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        sad1 += tmp;
34659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
34759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp = cInt2[3];
34859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp2 = tmp & mask;
34959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp = (tmp >> 8) & mask;
35059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp += tmp2;
35159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        sad4 += tmp;
35259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp = cInt2[2];
35359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp2 = tmp & mask;
35459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp = (tmp >> 8) & mask;
35559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp += tmp2;
35659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        sad4 += tmp;
35759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp = cInt2[1];
35859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp2 = tmp & mask;
35959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp = (tmp >> 8) & mask;
36059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp += tmp2;
36159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        sad3 += tmp;
36259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp = *cInt2;
36359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        cInt2 += (lx >> 2);
36459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp2 = tmp & mask;
36559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp = (tmp >> 8) & mask;
36659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        tmp += tmp2;
36759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong        sad3 += tmp;
36859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    }
36959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    sad1 += (sad1 << 16);
37059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    sad2 += (sad2 << 16);
37159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    sad3 += (sad3 << 16);
37259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    sad4 += (sad4 << 16);
37359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    sad1 >>= 16;
37459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    sad2 >>= 16;
37559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    sad3 >>= 16;
37659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    sad4 >>= 16;
37759f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
37859f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    mot_mb[1].sad = sad1;
37959f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    mot_mb[2].sad = sad2;
38059f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    mot_mb[3].sad = sad3;
38159f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    mot_mb[4].sad = sad4;
38259f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    mot_mb[0].sad = sad1 + sad2 + sad3 + sad4;
38359f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
38459f566c4ec3dfc097ad8163523e522280b27e5c3James Dong    return ;
38559f566c4ec3dfc097ad8163523e522280b27e5c3James Dong}
38659f566c4ec3dfc097ad8163523e522280b27e5c3James Dong
387