129a84457aed4c45bc900998b5e11c03023264208James Dong/* ------------------------------------------------------------------
229a84457aed4c45bc900998b5e11c03023264208James Dong * Copyright (C) 1998-2009 PacketVideo
329a84457aed4c45bc900998b5e11c03023264208James Dong *
429a84457aed4c45bc900998b5e11c03023264208James Dong * Licensed under the Apache License, Version 2.0 (the "License");
529a84457aed4c45bc900998b5e11c03023264208James Dong * you may not use this file except in compliance with the License.
629a84457aed4c45bc900998b5e11c03023264208James Dong * You may obtain a copy of the License at
729a84457aed4c45bc900998b5e11c03023264208James Dong *
829a84457aed4c45bc900998b5e11c03023264208James Dong *      http://www.apache.org/licenses/LICENSE-2.0
929a84457aed4c45bc900998b5e11c03023264208James Dong *
1029a84457aed4c45bc900998b5e11c03023264208James Dong * Unless required by applicable law or agreed to in writing, software
1129a84457aed4c45bc900998b5e11c03023264208James Dong * distributed under the License is distributed on an "AS IS" BASIS,
1229a84457aed4c45bc900998b5e11c03023264208James Dong * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
1329a84457aed4c45bc900998b5e11c03023264208James Dong * express or implied.
1429a84457aed4c45bc900998b5e11c03023264208James Dong * See the License for the specific language governing permissions
1529a84457aed4c45bc900998b5e11c03023264208James Dong * and limitations under the License.
1629a84457aed4c45bc900998b5e11c03023264208James Dong * -------------------------------------------------------------------
1729a84457aed4c45bc900998b5e11c03023264208James Dong */
1829a84457aed4c45bc900998b5e11c03023264208James Dong#include "avcenc_lib.h"
1929a84457aed4c45bc900998b5e11c03023264208James Dong/* 3/29/01 fast half-pel search based on neighboring guess */
2029a84457aed4c45bc900998b5e11c03023264208James Dong/* value ranging from 0 to 4, high complexity (more accurate) to
2129a84457aed4c45bc900998b5e11c03023264208James Dong   low complexity (less accurate) */
2229a84457aed4c45bc900998b5e11c03023264208James Dong#define HP_DISTANCE_TH      5 // 2  /* half-pel distance threshold */
2329a84457aed4c45bc900998b5e11c03023264208James Dong
2429a84457aed4c45bc900998b5e11c03023264208James Dong#define PREF_16_VEC 129     /* 1MV bias versus 4MVs*/
2529a84457aed4c45bc900998b5e11c03023264208James Dong
2629a84457aed4c45bc900998b5e11c03023264208James Dongconst static int distance_tab[9][9] =   /* [hp_guess][k] */
2729a84457aed4c45bc900998b5e11c03023264208James Dong{
2829a84457aed4c45bc900998b5e11c03023264208James Dong    {0, 1, 1, 1, 1, 1, 1, 1, 1},
2929a84457aed4c45bc900998b5e11c03023264208James Dong    {1, 0, 1, 2, 3, 4, 3, 2, 1},
3029a84457aed4c45bc900998b5e11c03023264208James Dong    {1, 0, 0, 0, 1, 2, 3, 2, 1},
3129a84457aed4c45bc900998b5e11c03023264208James Dong    {1, 2, 1, 0, 1, 2, 3, 4, 3},
3229a84457aed4c45bc900998b5e11c03023264208James Dong    {1, 2, 1, 0, 0, 0, 1, 2, 3},
3329a84457aed4c45bc900998b5e11c03023264208James Dong    {1, 4, 3, 2, 1, 0, 1, 2, 3},
3429a84457aed4c45bc900998b5e11c03023264208James Dong    {1, 2, 3, 2, 1, 0, 0, 0, 1},
3529a84457aed4c45bc900998b5e11c03023264208James Dong    {1, 2, 3, 4, 3, 2, 1, 0, 1},
3629a84457aed4c45bc900998b5e11c03023264208James Dong    {1, 0, 1, 2, 3, 2, 1, 0, 0}
3729a84457aed4c45bc900998b5e11c03023264208James Dong};
3829a84457aed4c45bc900998b5e11c03023264208James Dong
3929a84457aed4c45bc900998b5e11c03023264208James Dong#define CLIP_RESULT(x)      if((uint)x > 0xFF){ \
4029a84457aed4c45bc900998b5e11c03023264208James Dong                 x = 0xFF & (~(x>>31));}
4129a84457aed4c45bc900998b5e11c03023264208James Dong
4229a84457aed4c45bc900998b5e11c03023264208James Dong#define CLIP_UPPER16(x)     if((uint)x >= 0x20000000){ \
4329a84457aed4c45bc900998b5e11c03023264208James Dong        x = 0xFF0000 & (~(x>>31));} \
4429a84457aed4c45bc900998b5e11c03023264208James Dong        else { \
4529a84457aed4c45bc900998b5e11c03023264208James Dong        x = (x>>5)&0xFF0000; \
4629a84457aed4c45bc900998b5e11c03023264208James Dong        }
4729a84457aed4c45bc900998b5e11c03023264208James Dong
4829a84457aed4c45bc900998b5e11c03023264208James Dong/*=====================================================================
4929a84457aed4c45bc900998b5e11c03023264208James Dong    Function:   AVCFindHalfPelMB
5029a84457aed4c45bc900998b5e11c03023264208James Dong    Date:       10/31/2007
5129a84457aed4c45bc900998b5e11c03023264208James Dong    Purpose:    Find half pel resolution MV surrounding the full-pel MV
5229a84457aed4c45bc900998b5e11c03023264208James Dong=====================================================================*/
5329a84457aed4c45bc900998b5e11c03023264208James Dong
5429a84457aed4c45bc900998b5e11c03023264208James Dongint AVCFindHalfPelMB(AVCEncObject *encvid, uint8 *cur, AVCMV *mot, uint8 *ncand,
5529a84457aed4c45bc900998b5e11c03023264208James Dong                     int xpos, int ypos, int hp_guess, int cmvx, int cmvy)
5629a84457aed4c45bc900998b5e11c03023264208James Dong{
5729a84457aed4c45bc900998b5e11c03023264208James Dong    AVCPictureData *currPic = encvid->common->currPic;
5829a84457aed4c45bc900998b5e11c03023264208James Dong    int lx = currPic->pitch;
5929a84457aed4c45bc900998b5e11c03023264208James Dong    int d, dmin, satd_min;
6029a84457aed4c45bc900998b5e11c03023264208James Dong    uint8* cand;
6129a84457aed4c45bc900998b5e11c03023264208James Dong    int lambda_motion = encvid->lambda_motion;
6229a84457aed4c45bc900998b5e11c03023264208James Dong    uint8 *mvbits = encvid->mvbits;
6329a84457aed4c45bc900998b5e11c03023264208James Dong    int mvcost;
6429a84457aed4c45bc900998b5e11c03023264208James Dong    /* list of candidate to go through for half-pel search*/
6529a84457aed4c45bc900998b5e11c03023264208James Dong    uint8 *subpel_pred = (uint8*) encvid->subpel_pred; // all 16 sub-pel positions
6629a84457aed4c45bc900998b5e11c03023264208James Dong    uint8 **hpel_cand = (uint8**) encvid->hpel_cand; /* half-pel position */
6729a84457aed4c45bc900998b5e11c03023264208James Dong
6829a84457aed4c45bc900998b5e11c03023264208James Dong    int xh[9] = {0, 0, 2, 2, 2, 0, -2, -2, -2};
6929a84457aed4c45bc900998b5e11c03023264208James Dong    int yh[9] = {0, -2, -2, 0, 2, 2, 2, 0, -2};
7029a84457aed4c45bc900998b5e11c03023264208James Dong    int xq[8] = {0, 1, 1, 1, 0, -1, -1, -1};
7129a84457aed4c45bc900998b5e11c03023264208James Dong    int yq[8] = { -1, -1, 0, 1, 1, 1, 0, -1};
7229a84457aed4c45bc900998b5e11c03023264208James Dong    int h, hmin, q, qmin;
7329a84457aed4c45bc900998b5e11c03023264208James Dong
7429a84457aed4c45bc900998b5e11c03023264208James Dong    OSCL_UNUSED_ARG(xpos);
7529a84457aed4c45bc900998b5e11c03023264208James Dong    OSCL_UNUSED_ARG(ypos);
7629a84457aed4c45bc900998b5e11c03023264208James Dong    OSCL_UNUSED_ARG(hp_guess);
7729a84457aed4c45bc900998b5e11c03023264208James Dong
7829a84457aed4c45bc900998b5e11c03023264208James Dong    GenerateHalfPelPred(subpel_pred, ncand, lx);
7929a84457aed4c45bc900998b5e11c03023264208James Dong
8029a84457aed4c45bc900998b5e11c03023264208James Dong    cur = encvid->currYMB; // pre-load current original MB
8129a84457aed4c45bc900998b5e11c03023264208James Dong
8229a84457aed4c45bc900998b5e11c03023264208James Dong    cand = hpel_cand[0];
8329a84457aed4c45bc900998b5e11c03023264208James Dong
8429a84457aed4c45bc900998b5e11c03023264208James Dong    // find cost for the current full-pel position
8529a84457aed4c45bc900998b5e11c03023264208James Dong    dmin = SATD_MB(cand, cur, 65535); // get Hadamaard transform SAD
8629a84457aed4c45bc900998b5e11c03023264208James Dong    mvcost = MV_COST_S(lambda_motion, mot->x, mot->y, cmvx, cmvy);
8729a84457aed4c45bc900998b5e11c03023264208James Dong    satd_min = dmin;
8829a84457aed4c45bc900998b5e11c03023264208James Dong    dmin += mvcost;
8929a84457aed4c45bc900998b5e11c03023264208James Dong    hmin = 0;
9029a84457aed4c45bc900998b5e11c03023264208James Dong
9129a84457aed4c45bc900998b5e11c03023264208James Dong    /* find half-pel */
9229a84457aed4c45bc900998b5e11c03023264208James Dong    for (h = 1; h < 9; h++)
9329a84457aed4c45bc900998b5e11c03023264208James Dong    {
9429a84457aed4c45bc900998b5e11c03023264208James Dong        d = SATD_MB(hpel_cand[h], cur, dmin);
9529a84457aed4c45bc900998b5e11c03023264208James Dong        mvcost = MV_COST_S(lambda_motion, mot->x + xh[h], mot->y + yh[h], cmvx, cmvy);
9629a84457aed4c45bc900998b5e11c03023264208James Dong        d += mvcost;
9729a84457aed4c45bc900998b5e11c03023264208James Dong
9829a84457aed4c45bc900998b5e11c03023264208James Dong        if (d < dmin)
9929a84457aed4c45bc900998b5e11c03023264208James Dong        {
10029a84457aed4c45bc900998b5e11c03023264208James Dong            dmin = d;
10129a84457aed4c45bc900998b5e11c03023264208James Dong            hmin = h;
10229a84457aed4c45bc900998b5e11c03023264208James Dong            satd_min = d - mvcost;
10329a84457aed4c45bc900998b5e11c03023264208James Dong        }
10429a84457aed4c45bc900998b5e11c03023264208James Dong    }
10529a84457aed4c45bc900998b5e11c03023264208James Dong
10629a84457aed4c45bc900998b5e11c03023264208James Dong    mot->sad = dmin;
10729a84457aed4c45bc900998b5e11c03023264208James Dong    mot->x += xh[hmin];
10829a84457aed4c45bc900998b5e11c03023264208James Dong    mot->y += yh[hmin];
10929a84457aed4c45bc900998b5e11c03023264208James Dong    encvid->best_hpel_pos = hmin;
11029a84457aed4c45bc900998b5e11c03023264208James Dong
11129a84457aed4c45bc900998b5e11c03023264208James Dong    /*** search for quarter-pel ****/
11229a84457aed4c45bc900998b5e11c03023264208James Dong    GenerateQuartPelPred(encvid->bilin_base[hmin], &(encvid->qpel_cand[0][0]), hmin);
11329a84457aed4c45bc900998b5e11c03023264208James Dong
11429a84457aed4c45bc900998b5e11c03023264208James Dong    encvid->best_qpel_pos = qmin = -1;
11529a84457aed4c45bc900998b5e11c03023264208James Dong
11629a84457aed4c45bc900998b5e11c03023264208James Dong    for (q = 0; q < 8; q++)
11729a84457aed4c45bc900998b5e11c03023264208James Dong    {
11829a84457aed4c45bc900998b5e11c03023264208James Dong        d = SATD_MB(encvid->qpel_cand[q], cur, dmin);
11929a84457aed4c45bc900998b5e11c03023264208James Dong        mvcost = MV_COST_S(lambda_motion, mot->x + xq[q], mot->y + yq[q], cmvx, cmvy);
12029a84457aed4c45bc900998b5e11c03023264208James Dong        d += mvcost;
12129a84457aed4c45bc900998b5e11c03023264208James Dong        if (d < dmin)
12229a84457aed4c45bc900998b5e11c03023264208James Dong        {
12329a84457aed4c45bc900998b5e11c03023264208James Dong            dmin = d;
12429a84457aed4c45bc900998b5e11c03023264208James Dong            qmin = q;
12529a84457aed4c45bc900998b5e11c03023264208James Dong            satd_min = d - mvcost;
12629a84457aed4c45bc900998b5e11c03023264208James Dong        }
12729a84457aed4c45bc900998b5e11c03023264208James Dong    }
12829a84457aed4c45bc900998b5e11c03023264208James Dong
12929a84457aed4c45bc900998b5e11c03023264208James Dong    if (qmin != -1)
13029a84457aed4c45bc900998b5e11c03023264208James Dong    {
13129a84457aed4c45bc900998b5e11c03023264208James Dong        mot->sad = dmin;
13229a84457aed4c45bc900998b5e11c03023264208James Dong        mot->x += xq[qmin];
13329a84457aed4c45bc900998b5e11c03023264208James Dong        mot->y += yq[qmin];
13429a84457aed4c45bc900998b5e11c03023264208James Dong        encvid->best_qpel_pos = qmin;
13529a84457aed4c45bc900998b5e11c03023264208James Dong    }
13629a84457aed4c45bc900998b5e11c03023264208James Dong
13729a84457aed4c45bc900998b5e11c03023264208James Dong    return satd_min;
13829a84457aed4c45bc900998b5e11c03023264208James Dong}
13929a84457aed4c45bc900998b5e11c03023264208James Dong
14029a84457aed4c45bc900998b5e11c03023264208James Dong
14129a84457aed4c45bc900998b5e11c03023264208James Dong
14229a84457aed4c45bc900998b5e11c03023264208James Dong/** This function generates sub-pel prediction around the full-pel candidate.
14329a84457aed4c45bc900998b5e11c03023264208James DongEach sub-pel position array is 20 pixel wide (for word-alignment) and 17 pixel tall. */
14429a84457aed4c45bc900998b5e11c03023264208James Dong/** The sub-pel position is labeled in spiral manner from the center. */
14529a84457aed4c45bc900998b5e11c03023264208James Dong
14629a84457aed4c45bc900998b5e11c03023264208James Dongvoid GenerateHalfPelPred(uint8* subpel_pred, uint8 *ncand, int lx)
14729a84457aed4c45bc900998b5e11c03023264208James Dong{
14829a84457aed4c45bc900998b5e11c03023264208James Dong    /* let's do straightforward way first */
14929a84457aed4c45bc900998b5e11c03023264208James Dong    uint8 *ref;
15029a84457aed4c45bc900998b5e11c03023264208James Dong    uint8 *dst;
15129a84457aed4c45bc900998b5e11c03023264208James Dong    uint8 tmp8;
15229a84457aed4c45bc900998b5e11c03023264208James Dong    int32 tmp32;
15329a84457aed4c45bc900998b5e11c03023264208James Dong    int16 tmp_horz[18*22], *dst_16, *src_16;
15429a84457aed4c45bc900998b5e11c03023264208James Dong    register int a = 0, b = 0, c = 0, d = 0, e = 0, f = 0; // temp register
15529a84457aed4c45bc900998b5e11c03023264208James Dong    int msk;
15629a84457aed4c45bc900998b5e11c03023264208James Dong    int i, j;
15729a84457aed4c45bc900998b5e11c03023264208James Dong
15829a84457aed4c45bc900998b5e11c03023264208James Dong    /* first copy full-pel to the first array */
15929a84457aed4c45bc900998b5e11c03023264208James Dong    /* to be optimized later based on byte-offset load */
16029a84457aed4c45bc900998b5e11c03023264208James Dong    ref = ncand - 3 - lx - (lx << 1); /* move back (-3,-3) */
16129a84457aed4c45bc900998b5e11c03023264208James Dong    dst = subpel_pred;
16229a84457aed4c45bc900998b5e11c03023264208James Dong
16329a84457aed4c45bc900998b5e11c03023264208James Dong    dst -= 4; /* offset */
16429a84457aed4c45bc900998b5e11c03023264208James Dong    for (j = 0; j < 22; j++) /* 24x22 */
16529a84457aed4c45bc900998b5e11c03023264208James Dong    {
16629a84457aed4c45bc900998b5e11c03023264208James Dong        i = 6;
16729a84457aed4c45bc900998b5e11c03023264208James Dong        while (i > 0)
16829a84457aed4c45bc900998b5e11c03023264208James Dong        {
16929a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = *ref++;
17029a84457aed4c45bc900998b5e11c03023264208James Dong            tmp8 = *ref++;
17129a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 |= (tmp8 << 8);
17229a84457aed4c45bc900998b5e11c03023264208James Dong            tmp8 = *ref++;
17329a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 |= (tmp8 << 16);
17429a84457aed4c45bc900998b5e11c03023264208James Dong            tmp8 = *ref++;
17529a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 |= (tmp8 << 24);
17629a84457aed4c45bc900998b5e11c03023264208James Dong            *((uint32*)(dst += 4)) = tmp32;
17729a84457aed4c45bc900998b5e11c03023264208James Dong            i--;
17829a84457aed4c45bc900998b5e11c03023264208James Dong        }
17929a84457aed4c45bc900998b5e11c03023264208James Dong        ref += (lx - 24);
18029a84457aed4c45bc900998b5e11c03023264208James Dong    }
18129a84457aed4c45bc900998b5e11c03023264208James Dong
18229a84457aed4c45bc900998b5e11c03023264208James Dong    /* from the first array, we do horizontal interp */
18329a84457aed4c45bc900998b5e11c03023264208James Dong    ref = subpel_pred + 2;
18429a84457aed4c45bc900998b5e11c03023264208James Dong    dst_16 = tmp_horz; /* 17 x 22 */
18529a84457aed4c45bc900998b5e11c03023264208James Dong
18629a84457aed4c45bc900998b5e11c03023264208James Dong    for (j = 4; j > 0; j--)
18729a84457aed4c45bc900998b5e11c03023264208James Dong    {
18829a84457aed4c45bc900998b5e11c03023264208James Dong        for (i = 16; i > 0; i -= 4)
18929a84457aed4c45bc900998b5e11c03023264208James Dong        {
19029a84457aed4c45bc900998b5e11c03023264208James Dong            a = ref[-2];
19129a84457aed4c45bc900998b5e11c03023264208James Dong            b = ref[-1];
19229a84457aed4c45bc900998b5e11c03023264208James Dong            c = ref[0];
19329a84457aed4c45bc900998b5e11c03023264208James Dong            d = ref[1];
19429a84457aed4c45bc900998b5e11c03023264208James Dong            e = ref[2];
19529a84457aed4c45bc900998b5e11c03023264208James Dong            f = ref[3];
19629a84457aed4c45bc900998b5e11c03023264208James Dong            *dst_16++ = a + f - 5 * (b + e) + 20 * (c + d);
19729a84457aed4c45bc900998b5e11c03023264208James Dong            a = ref[4];
19829a84457aed4c45bc900998b5e11c03023264208James Dong            *dst_16++ = b + a - 5 * (c + f) + 20 * (d + e);
19929a84457aed4c45bc900998b5e11c03023264208James Dong            b = ref[5];
20029a84457aed4c45bc900998b5e11c03023264208James Dong            *dst_16++ = c + b - 5 * (d + a) + 20 * (e + f);
20129a84457aed4c45bc900998b5e11c03023264208James Dong            c = ref[6];
20229a84457aed4c45bc900998b5e11c03023264208James Dong            *dst_16++ = d + c - 5 * (e + b) + 20 * (f + a);
20329a84457aed4c45bc900998b5e11c03023264208James Dong
20429a84457aed4c45bc900998b5e11c03023264208James Dong            ref += 4;
20529a84457aed4c45bc900998b5e11c03023264208James Dong        }
20629a84457aed4c45bc900998b5e11c03023264208James Dong        /* do the 17th column here */
20729a84457aed4c45bc900998b5e11c03023264208James Dong        d = ref[3];
20829a84457aed4c45bc900998b5e11c03023264208James Dong        *dst_16 =  e + d - 5 * (f + c) + 20 * (a + b);
20929a84457aed4c45bc900998b5e11c03023264208James Dong        dst_16 += 2; /* stride for tmp_horz is 18 */
21029a84457aed4c45bc900998b5e11c03023264208James Dong        ref += 8;  /* stride for ref is 24 */
21129a84457aed4c45bc900998b5e11c03023264208James Dong        if (j == 3)  // move 18 lines down
21229a84457aed4c45bc900998b5e11c03023264208James Dong        {
21329a84457aed4c45bc900998b5e11c03023264208James Dong            dst_16 += 324;//18*18;
21429a84457aed4c45bc900998b5e11c03023264208James Dong            ref += 432;//18*24;
21529a84457aed4c45bc900998b5e11c03023264208James Dong        }
21629a84457aed4c45bc900998b5e11c03023264208James Dong    }
21729a84457aed4c45bc900998b5e11c03023264208James Dong
21829a84457aed4c45bc900998b5e11c03023264208James Dong    ref -= 480;//20*24;
21929a84457aed4c45bc900998b5e11c03023264208James Dong    dst_16 -= 360;//20*18;
22029a84457aed4c45bc900998b5e11c03023264208James Dong    dst = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE; /* go to the 14th array 17x18*/
22129a84457aed4c45bc900998b5e11c03023264208James Dong
22229a84457aed4c45bc900998b5e11c03023264208James Dong    for (j = 18; j > 0; j--)
22329a84457aed4c45bc900998b5e11c03023264208James Dong    {
22429a84457aed4c45bc900998b5e11c03023264208James Dong        for (i = 16; i > 0; i -= 4)
22529a84457aed4c45bc900998b5e11c03023264208James Dong        {
22629a84457aed4c45bc900998b5e11c03023264208James Dong            a = ref[-2];
22729a84457aed4c45bc900998b5e11c03023264208James Dong            b = ref[-1];
22829a84457aed4c45bc900998b5e11c03023264208James Dong            c = ref[0];
22929a84457aed4c45bc900998b5e11c03023264208James Dong            d = ref[1];
23029a84457aed4c45bc900998b5e11c03023264208James Dong            e = ref[2];
23129a84457aed4c45bc900998b5e11c03023264208James Dong            f = ref[3];
23229a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = a + f - 5 * (b + e) + 20 * (c + d);
23329a84457aed4c45bc900998b5e11c03023264208James Dong            *dst_16++ = tmp32;
23429a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = (tmp32 + 16) >> 5;
23529a84457aed4c45bc900998b5e11c03023264208James Dong            CLIP_RESULT(tmp32)
23629a84457aed4c45bc900998b5e11c03023264208James Dong            *dst++ = tmp32;
23729a84457aed4c45bc900998b5e11c03023264208James Dong
23829a84457aed4c45bc900998b5e11c03023264208James Dong            a = ref[4];
23929a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = b + a - 5 * (c + f) + 20 * (d + e);
24029a84457aed4c45bc900998b5e11c03023264208James Dong            *dst_16++ = tmp32;
24129a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = (tmp32 + 16) >> 5;
24229a84457aed4c45bc900998b5e11c03023264208James Dong            CLIP_RESULT(tmp32)
24329a84457aed4c45bc900998b5e11c03023264208James Dong            *dst++ = tmp32;
24429a84457aed4c45bc900998b5e11c03023264208James Dong
24529a84457aed4c45bc900998b5e11c03023264208James Dong            b = ref[5];
24629a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = c + b - 5 * (d + a) + 20 * (e + f);
24729a84457aed4c45bc900998b5e11c03023264208James Dong            *dst_16++ = tmp32;
24829a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = (tmp32 + 16) >> 5;
24929a84457aed4c45bc900998b5e11c03023264208James Dong            CLIP_RESULT(tmp32)
25029a84457aed4c45bc900998b5e11c03023264208James Dong            *dst++ = tmp32;
25129a84457aed4c45bc900998b5e11c03023264208James Dong
25229a84457aed4c45bc900998b5e11c03023264208James Dong            c = ref[6];
25329a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = d + c - 5 * (e + b) + 20 * (f + a);
25429a84457aed4c45bc900998b5e11c03023264208James Dong            *dst_16++ = tmp32;
25529a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = (tmp32 + 16) >> 5;
25629a84457aed4c45bc900998b5e11c03023264208James Dong            CLIP_RESULT(tmp32)
25729a84457aed4c45bc900998b5e11c03023264208James Dong            *dst++ = tmp32;
25829a84457aed4c45bc900998b5e11c03023264208James Dong
25929a84457aed4c45bc900998b5e11c03023264208James Dong            ref += 4;
26029a84457aed4c45bc900998b5e11c03023264208James Dong        }
26129a84457aed4c45bc900998b5e11c03023264208James Dong        /* do the 17th column here */
26229a84457aed4c45bc900998b5e11c03023264208James Dong        d = ref[3];
26329a84457aed4c45bc900998b5e11c03023264208James Dong        tmp32 =  e + d - 5 * (f + c) + 20 * (a + b);
26429a84457aed4c45bc900998b5e11c03023264208James Dong        *dst_16 = tmp32;
26529a84457aed4c45bc900998b5e11c03023264208James Dong        tmp32 = (tmp32 + 16) >> 5;
26629a84457aed4c45bc900998b5e11c03023264208James Dong        CLIP_RESULT(tmp32)
26729a84457aed4c45bc900998b5e11c03023264208James Dong        *dst = tmp32;
26829a84457aed4c45bc900998b5e11c03023264208James Dong
26929a84457aed4c45bc900998b5e11c03023264208James Dong        dst += 8;  /* stride for dst is 24 */
27029a84457aed4c45bc900998b5e11c03023264208James Dong        dst_16 += 2; /* stride for tmp_horz is 18 */
27129a84457aed4c45bc900998b5e11c03023264208James Dong        ref += 8;  /* stride for ref is 24 */
27229a84457aed4c45bc900998b5e11c03023264208James Dong    }
27329a84457aed4c45bc900998b5e11c03023264208James Dong
27429a84457aed4c45bc900998b5e11c03023264208James Dong
27529a84457aed4c45bc900998b5e11c03023264208James Dong    /* Do middle point filtering*/
27629a84457aed4c45bc900998b5e11c03023264208James Dong    src_16 = tmp_horz; /* 17 x 22 */
27729a84457aed4c45bc900998b5e11c03023264208James Dong    dst = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE; /* 12th array 17x17*/
27829a84457aed4c45bc900998b5e11c03023264208James Dong    dst -= 24; // offset
27929a84457aed4c45bc900998b5e11c03023264208James Dong    for (i = 0; i < 17; i++)
28029a84457aed4c45bc900998b5e11c03023264208James Dong    {
28129a84457aed4c45bc900998b5e11c03023264208James Dong        for (j = 16; j > 0; j -= 4)
28229a84457aed4c45bc900998b5e11c03023264208James Dong        {
28329a84457aed4c45bc900998b5e11c03023264208James Dong            a = *src_16;
28429a84457aed4c45bc900998b5e11c03023264208James Dong            b = *(src_16 += 18);
28529a84457aed4c45bc900998b5e11c03023264208James Dong            c = *(src_16 += 18);
28629a84457aed4c45bc900998b5e11c03023264208James Dong            d = *(src_16 += 18);
28729a84457aed4c45bc900998b5e11c03023264208James Dong            e = *(src_16 += 18);
28829a84457aed4c45bc900998b5e11c03023264208James Dong            f = *(src_16 += 18);
28929a84457aed4c45bc900998b5e11c03023264208James Dong
29029a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = a + f - 5 * (b + e) + 20 * (c + d);
29129a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = (tmp32 + 512) >> 10;
29229a84457aed4c45bc900998b5e11c03023264208James Dong            CLIP_RESULT(tmp32)
29329a84457aed4c45bc900998b5e11c03023264208James Dong            *(dst += 24) = tmp32;
29429a84457aed4c45bc900998b5e11c03023264208James Dong
29529a84457aed4c45bc900998b5e11c03023264208James Dong            a = *(src_16 += 18);
29629a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = b + a - 5 * (c + f) + 20 * (d + e);
29729a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = (tmp32 + 512) >> 10;
29829a84457aed4c45bc900998b5e11c03023264208James Dong            CLIP_RESULT(tmp32)
29929a84457aed4c45bc900998b5e11c03023264208James Dong            *(dst += 24) = tmp32;
30029a84457aed4c45bc900998b5e11c03023264208James Dong
30129a84457aed4c45bc900998b5e11c03023264208James Dong            b = *(src_16 += 18);
30229a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = c + b - 5 * (d + a) + 20 * (e + f);
30329a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = (tmp32 + 512) >> 10;
30429a84457aed4c45bc900998b5e11c03023264208James Dong            CLIP_RESULT(tmp32)
30529a84457aed4c45bc900998b5e11c03023264208James Dong            *(dst += 24) = tmp32;
30629a84457aed4c45bc900998b5e11c03023264208James Dong
30729a84457aed4c45bc900998b5e11c03023264208James Dong            c = *(src_16 += 18);
30829a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = d + c - 5 * (e + b) + 20 * (f + a);
30929a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = (tmp32 + 512) >> 10;
31029a84457aed4c45bc900998b5e11c03023264208James Dong            CLIP_RESULT(tmp32)
31129a84457aed4c45bc900998b5e11c03023264208James Dong            *(dst += 24) = tmp32;
31229a84457aed4c45bc900998b5e11c03023264208James Dong
31329a84457aed4c45bc900998b5e11c03023264208James Dong            src_16 -= (18 << 2);
31429a84457aed4c45bc900998b5e11c03023264208James Dong        }
31529a84457aed4c45bc900998b5e11c03023264208James Dong
31629a84457aed4c45bc900998b5e11c03023264208James Dong        d = src_16[90]; // 18*5
31729a84457aed4c45bc900998b5e11c03023264208James Dong        tmp32 = e + d - 5 * (f + c) + 20 * (a + b);
31829a84457aed4c45bc900998b5e11c03023264208James Dong        tmp32 = (tmp32 + 512) >> 10;
31929a84457aed4c45bc900998b5e11c03023264208James Dong        CLIP_RESULT(tmp32)
32029a84457aed4c45bc900998b5e11c03023264208James Dong        dst[24] = tmp32;
32129a84457aed4c45bc900998b5e11c03023264208James Dong
32229a84457aed4c45bc900998b5e11c03023264208James Dong        src_16 -= ((18 << 4) - 1);
32329a84457aed4c45bc900998b5e11c03023264208James Dong        dst -= ((24 << 4) - 1);
32429a84457aed4c45bc900998b5e11c03023264208James Dong    }
32529a84457aed4c45bc900998b5e11c03023264208James Dong
32629a84457aed4c45bc900998b5e11c03023264208James Dong    /* do vertical interpolation */
32729a84457aed4c45bc900998b5e11c03023264208James Dong    ref = subpel_pred + 2;
32829a84457aed4c45bc900998b5e11c03023264208James Dong    dst = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE; /* 10th array 18x17 */
32929a84457aed4c45bc900998b5e11c03023264208James Dong    dst -= 24; // offset
33029a84457aed4c45bc900998b5e11c03023264208James Dong
33129a84457aed4c45bc900998b5e11c03023264208James Dong    for (i = 2; i > 0; i--)
33229a84457aed4c45bc900998b5e11c03023264208James Dong    {
33329a84457aed4c45bc900998b5e11c03023264208James Dong        for (j = 16; j > 0; j -= 4)
33429a84457aed4c45bc900998b5e11c03023264208James Dong        {
33529a84457aed4c45bc900998b5e11c03023264208James Dong            a = *ref;
33629a84457aed4c45bc900998b5e11c03023264208James Dong            b = *(ref += 24);
33729a84457aed4c45bc900998b5e11c03023264208James Dong            c = *(ref += 24);
33829a84457aed4c45bc900998b5e11c03023264208James Dong            d = *(ref += 24);
33929a84457aed4c45bc900998b5e11c03023264208James Dong            e = *(ref += 24);
34029a84457aed4c45bc900998b5e11c03023264208James Dong            f = *(ref += 24);
34129a84457aed4c45bc900998b5e11c03023264208James Dong
34229a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = a + f - 5 * (b + e) + 20 * (c + d);
34329a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = (tmp32 + 16) >> 5;
34429a84457aed4c45bc900998b5e11c03023264208James Dong            CLIP_RESULT(tmp32)
34529a84457aed4c45bc900998b5e11c03023264208James Dong            *(dst += 24) = tmp32;  // 10th
34629a84457aed4c45bc900998b5e11c03023264208James Dong
34729a84457aed4c45bc900998b5e11c03023264208James Dong            a = *(ref += 24);
34829a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = b + a - 5 * (c + f) + 20 * (d + e);
34929a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = (tmp32 + 16) >> 5;
35029a84457aed4c45bc900998b5e11c03023264208James Dong            CLIP_RESULT(tmp32)
35129a84457aed4c45bc900998b5e11c03023264208James Dong            *(dst += 24) = tmp32;  // 10th
35229a84457aed4c45bc900998b5e11c03023264208James Dong
35329a84457aed4c45bc900998b5e11c03023264208James Dong            b = *(ref += 24);
35429a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = c + b - 5 * (d + a) + 20 * (e + f);
35529a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = (tmp32 + 16) >> 5;
35629a84457aed4c45bc900998b5e11c03023264208James Dong            CLIP_RESULT(tmp32)
35729a84457aed4c45bc900998b5e11c03023264208James Dong            *(dst += 24) = tmp32;  // 10th
35829a84457aed4c45bc900998b5e11c03023264208James Dong
35929a84457aed4c45bc900998b5e11c03023264208James Dong            c = *(ref += 24);
36029a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = d + c - 5 * (e + b) + 20 * (f + a);
36129a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = (tmp32 + 16) >> 5;
36229a84457aed4c45bc900998b5e11c03023264208James Dong            CLIP_RESULT(tmp32)
36329a84457aed4c45bc900998b5e11c03023264208James Dong            *(dst += 24) = tmp32;  // 10th
36429a84457aed4c45bc900998b5e11c03023264208James Dong
36529a84457aed4c45bc900998b5e11c03023264208James Dong            ref -= (24 << 2);
36629a84457aed4c45bc900998b5e11c03023264208James Dong        }
36729a84457aed4c45bc900998b5e11c03023264208James Dong
36829a84457aed4c45bc900998b5e11c03023264208James Dong        d = ref[120]; // 24*5
36929a84457aed4c45bc900998b5e11c03023264208James Dong        tmp32 = e + d - 5 * (f + c) + 20 * (a + b);
37029a84457aed4c45bc900998b5e11c03023264208James Dong        tmp32 = (tmp32 + 16) >> 5;
37129a84457aed4c45bc900998b5e11c03023264208James Dong        CLIP_RESULT(tmp32)
37229a84457aed4c45bc900998b5e11c03023264208James Dong        dst[24] = tmp32;  // 10th
37329a84457aed4c45bc900998b5e11c03023264208James Dong
37429a84457aed4c45bc900998b5e11c03023264208James Dong        dst -= ((24 << 4) - 1);
37529a84457aed4c45bc900998b5e11c03023264208James Dong        ref -= ((24 << 4) - 1);
37629a84457aed4c45bc900998b5e11c03023264208James Dong    }
37729a84457aed4c45bc900998b5e11c03023264208James Dong
37829a84457aed4c45bc900998b5e11c03023264208James Dong    // note that using SIMD here doesn't help much, the cycle almost stays the same
37929a84457aed4c45bc900998b5e11c03023264208James Dong    // one can just use the above code and change the for(i=2 to for(i=18
38029a84457aed4c45bc900998b5e11c03023264208James Dong    for (i = 16; i > 0; i -= 4)
38129a84457aed4c45bc900998b5e11c03023264208James Dong    {
38229a84457aed4c45bc900998b5e11c03023264208James Dong        msk = 0;
38329a84457aed4c45bc900998b5e11c03023264208James Dong        for (j = 17; j > 0; j--)
38429a84457aed4c45bc900998b5e11c03023264208James Dong        {
38529a84457aed4c45bc900998b5e11c03023264208James Dong            a = *((uint32*)ref); /* load 4 bytes */
38629a84457aed4c45bc900998b5e11c03023264208James Dong            b = (a >> 8) & 0xFF00FF; /* second and fourth byte */
38729a84457aed4c45bc900998b5e11c03023264208James Dong            a &= 0xFF00FF;
38829a84457aed4c45bc900998b5e11c03023264208James Dong
38929a84457aed4c45bc900998b5e11c03023264208James Dong            c = *((uint32*)(ref + 120));
39029a84457aed4c45bc900998b5e11c03023264208James Dong            d = (c >> 8) & 0xFF00FF;
39129a84457aed4c45bc900998b5e11c03023264208James Dong            c &= 0xFF00FF;
39229a84457aed4c45bc900998b5e11c03023264208James Dong
39329a84457aed4c45bc900998b5e11c03023264208James Dong            a += c;
39429a84457aed4c45bc900998b5e11c03023264208James Dong            b += d;
39529a84457aed4c45bc900998b5e11c03023264208James Dong
39629a84457aed4c45bc900998b5e11c03023264208James Dong            e = *((uint32*)(ref + 72)); /* e, f */
39729a84457aed4c45bc900998b5e11c03023264208James Dong            f = (e >> 8) & 0xFF00FF;
39829a84457aed4c45bc900998b5e11c03023264208James Dong            e &= 0xFF00FF;
39929a84457aed4c45bc900998b5e11c03023264208James Dong
40029a84457aed4c45bc900998b5e11c03023264208James Dong            c = *((uint32*)(ref + 48)); /* c, d */
40129a84457aed4c45bc900998b5e11c03023264208James Dong            d = (c >> 8) & 0xFF00FF;
40229a84457aed4c45bc900998b5e11c03023264208James Dong            c &= 0xFF00FF;
40329a84457aed4c45bc900998b5e11c03023264208James Dong
40429a84457aed4c45bc900998b5e11c03023264208James Dong            c += e;
40529a84457aed4c45bc900998b5e11c03023264208James Dong            d += f;
40629a84457aed4c45bc900998b5e11c03023264208James Dong
40729a84457aed4c45bc900998b5e11c03023264208James Dong            a += 20 * c;
40829a84457aed4c45bc900998b5e11c03023264208James Dong            b += 20 * d;
40929a84457aed4c45bc900998b5e11c03023264208James Dong            a += 0x100010;
41029a84457aed4c45bc900998b5e11c03023264208James Dong            b += 0x100010;
41129a84457aed4c45bc900998b5e11c03023264208James Dong
41229a84457aed4c45bc900998b5e11c03023264208James Dong            e = *((uint32*)(ref += 24)); /* e, f */
41329a84457aed4c45bc900998b5e11c03023264208James Dong            f = (e >> 8) & 0xFF00FF;
41429a84457aed4c45bc900998b5e11c03023264208James Dong            e &= 0xFF00FF;
41529a84457aed4c45bc900998b5e11c03023264208James Dong
41629a84457aed4c45bc900998b5e11c03023264208James Dong            c = *((uint32*)(ref + 72)); /* c, d */
41729a84457aed4c45bc900998b5e11c03023264208James Dong            d = (c >> 8) & 0xFF00FF;
41829a84457aed4c45bc900998b5e11c03023264208James Dong            c &= 0xFF00FF;
41929a84457aed4c45bc900998b5e11c03023264208James Dong
42029a84457aed4c45bc900998b5e11c03023264208James Dong            c += e;
42129a84457aed4c45bc900998b5e11c03023264208James Dong            d += f;
42229a84457aed4c45bc900998b5e11c03023264208James Dong
42329a84457aed4c45bc900998b5e11c03023264208James Dong            a -= 5 * c;
42429a84457aed4c45bc900998b5e11c03023264208James Dong            b -= 5 * d;
42529a84457aed4c45bc900998b5e11c03023264208James Dong
42629a84457aed4c45bc900998b5e11c03023264208James Dong            c = a << 16;
42729a84457aed4c45bc900998b5e11c03023264208James Dong            d = b << 16;
42829a84457aed4c45bc900998b5e11c03023264208James Dong            CLIP_UPPER16(a)
42929a84457aed4c45bc900998b5e11c03023264208James Dong            CLIP_UPPER16(c)
43029a84457aed4c45bc900998b5e11c03023264208James Dong            CLIP_UPPER16(b)
43129a84457aed4c45bc900998b5e11c03023264208James Dong            CLIP_UPPER16(d)
43229a84457aed4c45bc900998b5e11c03023264208James Dong
43329a84457aed4c45bc900998b5e11c03023264208James Dong            a |= (c >> 16);
43429a84457aed4c45bc900998b5e11c03023264208James Dong            b |= (d >> 16);
43529a84457aed4c45bc900998b5e11c03023264208James Dong            //  a>>=5;
43629a84457aed4c45bc900998b5e11c03023264208James Dong            //  b>>=5;
43729a84457aed4c45bc900998b5e11c03023264208James Dong            /* clip */
43829a84457aed4c45bc900998b5e11c03023264208James Dong            //  msk |= b;  msk|=a;
43929a84457aed4c45bc900998b5e11c03023264208James Dong            //  a &= 0xFF00FF;
44029a84457aed4c45bc900998b5e11c03023264208James Dong            //  b &= 0xFF00FF;
44129a84457aed4c45bc900998b5e11c03023264208James Dong            a |= (b << 8);  /* pack it back */
44229a84457aed4c45bc900998b5e11c03023264208James Dong
44329a84457aed4c45bc900998b5e11c03023264208James Dong            *((uint16*)(dst += 24)) = a & 0xFFFF; //dst is not word-aligned.
44429a84457aed4c45bc900998b5e11c03023264208James Dong            *((uint16*)(dst + 2)) = a >> 16;
44529a84457aed4c45bc900998b5e11c03023264208James Dong
44629a84457aed4c45bc900998b5e11c03023264208James Dong        }
44729a84457aed4c45bc900998b5e11c03023264208James Dong        dst -= 404; // 24*17-4
44829a84457aed4c45bc900998b5e11c03023264208James Dong        ref -= 404;
44929a84457aed4c45bc900998b5e11c03023264208James Dong        /*      if(msk & 0xFF00FF00) // need clipping
45029a84457aed4c45bc900998b5e11c03023264208James Dong                {
45129a84457aed4c45bc900998b5e11c03023264208James Dong                    VertInterpWClip(dst,ref); // re-do 4 column with clip
45229a84457aed4c45bc900998b5e11c03023264208James Dong                }*/
45329a84457aed4c45bc900998b5e11c03023264208James Dong    }
45429a84457aed4c45bc900998b5e11c03023264208James Dong
45529a84457aed4c45bc900998b5e11c03023264208James Dong    return ;
45629a84457aed4c45bc900998b5e11c03023264208James Dong}
45729a84457aed4c45bc900998b5e11c03023264208James Dong
45829a84457aed4c45bc900998b5e11c03023264208James Dongvoid VertInterpWClip(uint8 *dst, uint8 *ref)
45929a84457aed4c45bc900998b5e11c03023264208James Dong{
46029a84457aed4c45bc900998b5e11c03023264208James Dong    int i, j;
46129a84457aed4c45bc900998b5e11c03023264208James Dong    int a, b, c, d, e, f;
46229a84457aed4c45bc900998b5e11c03023264208James Dong    int32 tmp32;
46329a84457aed4c45bc900998b5e11c03023264208James Dong
46429a84457aed4c45bc900998b5e11c03023264208James Dong    dst -= 4;
46529a84457aed4c45bc900998b5e11c03023264208James Dong    ref -= 4;
46629a84457aed4c45bc900998b5e11c03023264208James Dong
46729a84457aed4c45bc900998b5e11c03023264208James Dong    for (i = 4; i > 0; i--)
46829a84457aed4c45bc900998b5e11c03023264208James Dong    {
46929a84457aed4c45bc900998b5e11c03023264208James Dong        for (j = 16; j > 0; j -= 4)
47029a84457aed4c45bc900998b5e11c03023264208James Dong        {
47129a84457aed4c45bc900998b5e11c03023264208James Dong            a = *ref;
47229a84457aed4c45bc900998b5e11c03023264208James Dong            b = *(ref += 24);
47329a84457aed4c45bc900998b5e11c03023264208James Dong            c = *(ref += 24);
47429a84457aed4c45bc900998b5e11c03023264208James Dong            d = *(ref += 24);
47529a84457aed4c45bc900998b5e11c03023264208James Dong            e = *(ref += 24);
47629a84457aed4c45bc900998b5e11c03023264208James Dong            f = *(ref += 24);
47729a84457aed4c45bc900998b5e11c03023264208James Dong
47829a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = a + f - 5 * (b + e) + 20 * (c + d);
47929a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = (tmp32 + 16) >> 5;
48029a84457aed4c45bc900998b5e11c03023264208James Dong            CLIP_RESULT(tmp32)
48129a84457aed4c45bc900998b5e11c03023264208James Dong            *(dst += 24) = tmp32;  // 10th
48229a84457aed4c45bc900998b5e11c03023264208James Dong
48329a84457aed4c45bc900998b5e11c03023264208James Dong            a = *(ref += 24);
48429a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = b + a - 5 * (c + f) + 20 * (d + e);
48529a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = (tmp32 + 16) >> 5;
48629a84457aed4c45bc900998b5e11c03023264208James Dong            CLIP_RESULT(tmp32)
48729a84457aed4c45bc900998b5e11c03023264208James Dong            *(dst += 24) = tmp32;  // 10th
48829a84457aed4c45bc900998b5e11c03023264208James Dong
48929a84457aed4c45bc900998b5e11c03023264208James Dong            b = *(ref += 24);
49029a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = c + b - 5 * (d + a) + 20 * (e + f);
49129a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = (tmp32 + 16) >> 5;
49229a84457aed4c45bc900998b5e11c03023264208James Dong            CLIP_RESULT(tmp32)
49329a84457aed4c45bc900998b5e11c03023264208James Dong            *(dst += 24) = tmp32;  // 10th
49429a84457aed4c45bc900998b5e11c03023264208James Dong
49529a84457aed4c45bc900998b5e11c03023264208James Dong            c = *(ref += 24);
49629a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = d + c - 5 * (e + b) + 20 * (f + a);
49729a84457aed4c45bc900998b5e11c03023264208James Dong            tmp32 = (tmp32 + 16) >> 5;
49829a84457aed4c45bc900998b5e11c03023264208James Dong            CLIP_RESULT(tmp32)
49929a84457aed4c45bc900998b5e11c03023264208James Dong            *(dst += 24) = tmp32;  // 10th
50029a84457aed4c45bc900998b5e11c03023264208James Dong
50129a84457aed4c45bc900998b5e11c03023264208James Dong            ref -= (24 << 2);
50229a84457aed4c45bc900998b5e11c03023264208James Dong        }
50329a84457aed4c45bc900998b5e11c03023264208James Dong
50429a84457aed4c45bc900998b5e11c03023264208James Dong        d = ref[120]; // 24*5
50529a84457aed4c45bc900998b5e11c03023264208James Dong        tmp32 = e + d - 5 * (f + c) + 20 * (a + b);
50629a84457aed4c45bc900998b5e11c03023264208James Dong        tmp32 = (tmp32 + 16) >> 5;
50729a84457aed4c45bc900998b5e11c03023264208James Dong        CLIP_RESULT(tmp32)
50829a84457aed4c45bc900998b5e11c03023264208James Dong        dst[24] = tmp32;  // 10th
50929a84457aed4c45bc900998b5e11c03023264208James Dong
51029a84457aed4c45bc900998b5e11c03023264208James Dong        dst -= ((24 << 4) - 1);
51129a84457aed4c45bc900998b5e11c03023264208James Dong        ref -= ((24 << 4) - 1);
51229a84457aed4c45bc900998b5e11c03023264208James Dong    }
51329a84457aed4c45bc900998b5e11c03023264208James Dong
51429a84457aed4c45bc900998b5e11c03023264208James Dong    return ;
51529a84457aed4c45bc900998b5e11c03023264208James Dong}
51629a84457aed4c45bc900998b5e11c03023264208James Dong
51729a84457aed4c45bc900998b5e11c03023264208James Dong
51829a84457aed4c45bc900998b5e11c03023264208James Dongvoid GenerateQuartPelPred(uint8 **bilin_base, uint8 *qpel_cand, int hpel_pos)
51929a84457aed4c45bc900998b5e11c03023264208James Dong{
52029a84457aed4c45bc900998b5e11c03023264208James Dong    // for even value of hpel_pos, start with pattern 1, otherwise, start with pattern 2
52129a84457aed4c45bc900998b5e11c03023264208James Dong    int i, j;
52229a84457aed4c45bc900998b5e11c03023264208James Dong
52329a84457aed4c45bc900998b5e11c03023264208James Dong    uint8 *c1 = qpel_cand;
52429a84457aed4c45bc900998b5e11c03023264208James Dong    uint8 *tl = bilin_base[0];
52529a84457aed4c45bc900998b5e11c03023264208James Dong    uint8 *tr = bilin_base[1];
52629a84457aed4c45bc900998b5e11c03023264208James Dong    uint8 *bl = bilin_base[2];
52729a84457aed4c45bc900998b5e11c03023264208James Dong    uint8 *br = bilin_base[3];
52829a84457aed4c45bc900998b5e11c03023264208James Dong    int a, b, c, d;
52929a84457aed4c45bc900998b5e11c03023264208James Dong    int offset = 1 - (384 * 7);
53029a84457aed4c45bc900998b5e11c03023264208James Dong
53129a84457aed4c45bc900998b5e11c03023264208James Dong    if (!(hpel_pos&1)) // diamond pattern
53229a84457aed4c45bc900998b5e11c03023264208James Dong    {
53329a84457aed4c45bc900998b5e11c03023264208James Dong        j = 16;
53429a84457aed4c45bc900998b5e11c03023264208James Dong        while (j--)
53529a84457aed4c45bc900998b5e11c03023264208James Dong        {
53629a84457aed4c45bc900998b5e11c03023264208James Dong            i = 16;
53729a84457aed4c45bc900998b5e11c03023264208James Dong            while (i--)
53829a84457aed4c45bc900998b5e11c03023264208James Dong            {
53929a84457aed4c45bc900998b5e11c03023264208James Dong                d = tr[24];
54029a84457aed4c45bc900998b5e11c03023264208James Dong                a = *tr++;
54129a84457aed4c45bc900998b5e11c03023264208James Dong                b = bl[1];
54229a84457aed4c45bc900998b5e11c03023264208James Dong                c = *br++;
54329a84457aed4c45bc900998b5e11c03023264208James Dong
54429a84457aed4c45bc900998b5e11c03023264208James Dong                *c1 = (c + a + 1) >> 1;
54529a84457aed4c45bc900998b5e11c03023264208James Dong                *(c1 += 384) = (b + a + 1) >> 1; /* c2 */
54629a84457aed4c45bc900998b5e11c03023264208James Dong                *(c1 += 384) = (b + c + 1) >> 1; /* c3 */
54729a84457aed4c45bc900998b5e11c03023264208James Dong                *(c1 += 384) = (b + d + 1) >> 1; /* c4 */
54829a84457aed4c45bc900998b5e11c03023264208James Dong
54929a84457aed4c45bc900998b5e11c03023264208James Dong                b = *bl++;
55029a84457aed4c45bc900998b5e11c03023264208James Dong
55129a84457aed4c45bc900998b5e11c03023264208James Dong                *(c1 += 384) = (c + d + 1) >> 1;  /* c5 */
55229a84457aed4c45bc900998b5e11c03023264208James Dong                *(c1 += 384) = (b + d + 1) >> 1;  /* c6 */
55329a84457aed4c45bc900998b5e11c03023264208James Dong                *(c1 += 384) = (b + c + 1) >> 1;  /* c7 */
55429a84457aed4c45bc900998b5e11c03023264208James Dong                *(c1 += 384) = (b + a + 1) >> 1;  /* c8 */
55529a84457aed4c45bc900998b5e11c03023264208James Dong
55629a84457aed4c45bc900998b5e11c03023264208James Dong                c1 += offset;
55729a84457aed4c45bc900998b5e11c03023264208James Dong            }
55829a84457aed4c45bc900998b5e11c03023264208James Dong            // advance to the next line, pitch is 24
55929a84457aed4c45bc900998b5e11c03023264208James Dong            tl += 8;
56029a84457aed4c45bc900998b5e11c03023264208James Dong            tr += 8;
56129a84457aed4c45bc900998b5e11c03023264208James Dong            bl += 8;
56229a84457aed4c45bc900998b5e11c03023264208James Dong            br += 8;
56329a84457aed4c45bc900998b5e11c03023264208James Dong            c1 += 8;
56429a84457aed4c45bc900998b5e11c03023264208James Dong        }
56529a84457aed4c45bc900998b5e11c03023264208James Dong    }
56629a84457aed4c45bc900998b5e11c03023264208James Dong    else // star pattern
56729a84457aed4c45bc900998b5e11c03023264208James Dong    {
56829a84457aed4c45bc900998b5e11c03023264208James Dong        j = 16;
56929a84457aed4c45bc900998b5e11c03023264208James Dong        while (j--)
57029a84457aed4c45bc900998b5e11c03023264208James Dong        {
57129a84457aed4c45bc900998b5e11c03023264208James Dong            i = 16;
57229a84457aed4c45bc900998b5e11c03023264208James Dong            while (i--)
57329a84457aed4c45bc900998b5e11c03023264208James Dong            {
57429a84457aed4c45bc900998b5e11c03023264208James Dong                a = *br++;
57529a84457aed4c45bc900998b5e11c03023264208James Dong                b = *tr++;
57629a84457aed4c45bc900998b5e11c03023264208James Dong                c = tl[1];
57729a84457aed4c45bc900998b5e11c03023264208James Dong                *c1 = (a + b + 1) >> 1;
57829a84457aed4c45bc900998b5e11c03023264208James Dong                b = bl[1];
57929a84457aed4c45bc900998b5e11c03023264208James Dong                *(c1 += 384) = (a + c + 1) >> 1; /* c2 */
58029a84457aed4c45bc900998b5e11c03023264208James Dong                c = tl[25];
58129a84457aed4c45bc900998b5e11c03023264208James Dong                *(c1 += 384) = (a + b + 1) >> 1; /* c3 */
58229a84457aed4c45bc900998b5e11c03023264208James Dong                b = tr[23];
58329a84457aed4c45bc900998b5e11c03023264208James Dong                *(c1 += 384) = (a + c + 1) >> 1; /* c4 */
58429a84457aed4c45bc900998b5e11c03023264208James Dong                c = tl[24];
58529a84457aed4c45bc900998b5e11c03023264208James Dong                *(c1 += 384) = (a + b + 1) >> 1; /* c5 */
58629a84457aed4c45bc900998b5e11c03023264208James Dong                b = *bl++;
58729a84457aed4c45bc900998b5e11c03023264208James Dong                *(c1 += 384) = (a + c + 1) >> 1; /* c6 */
58829a84457aed4c45bc900998b5e11c03023264208James Dong                c = *tl++;
58929a84457aed4c45bc900998b5e11c03023264208James Dong                *(c1 += 384) = (a + b + 1) >> 1; /* c7 */
59029a84457aed4c45bc900998b5e11c03023264208James Dong                *(c1 += 384) = (a + c + 1) >> 1; /* c8 */
59129a84457aed4c45bc900998b5e11c03023264208James Dong
59229a84457aed4c45bc900998b5e11c03023264208James Dong                c1 += offset;
59329a84457aed4c45bc900998b5e11c03023264208James Dong            }
59429a84457aed4c45bc900998b5e11c03023264208James Dong            // advance to the next line, pitch is 24
59529a84457aed4c45bc900998b5e11c03023264208James Dong            tl += 8;
59629a84457aed4c45bc900998b5e11c03023264208James Dong            tr += 8;
59729a84457aed4c45bc900998b5e11c03023264208James Dong            bl += 8;
59829a84457aed4c45bc900998b5e11c03023264208James Dong            br += 8;
59929a84457aed4c45bc900998b5e11c03023264208James Dong            c1 += 8;
60029a84457aed4c45bc900998b5e11c03023264208James Dong        }
60129a84457aed4c45bc900998b5e11c03023264208James Dong    }
60229a84457aed4c45bc900998b5e11c03023264208James Dong
60329a84457aed4c45bc900998b5e11c03023264208James Dong    return ;
60429a84457aed4c45bc900998b5e11c03023264208James Dong}
60529a84457aed4c45bc900998b5e11c03023264208James Dong
60629a84457aed4c45bc900998b5e11c03023264208James Dong
60729a84457aed4c45bc900998b5e11c03023264208James Dong/* assuming cand always has a pitch of 24 */
60829a84457aed4c45bc900998b5e11c03023264208James Dongint SATD_MB(uint8 *cand, uint8 *cur, int dmin)
60929a84457aed4c45bc900998b5e11c03023264208James Dong{
61029a84457aed4c45bc900998b5e11c03023264208James Dong    int cost;
61129a84457aed4c45bc900998b5e11c03023264208James Dong
61229a84457aed4c45bc900998b5e11c03023264208James Dong
61329a84457aed4c45bc900998b5e11c03023264208James Dong    dmin = (dmin << 16) | 24;
61429a84457aed4c45bc900998b5e11c03023264208James Dong    cost = AVCSAD_Macroblock_C(cand, cur, dmin, NULL);
61529a84457aed4c45bc900998b5e11c03023264208James Dong
61629a84457aed4c45bc900998b5e11c03023264208James Dong    return cost;
61729a84457aed4c45bc900998b5e11c03023264208James Dong}
61829a84457aed4c45bc900998b5e11c03023264208James Dong
61929a84457aed4c45bc900998b5e11c03023264208James Dong
62029a84457aed4c45bc900998b5e11c03023264208James Dong
62129a84457aed4c45bc900998b5e11c03023264208James Dong
62229a84457aed4c45bc900998b5e11c03023264208James Dong
623