13306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong/* ------------------------------------------------------------------
23306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong * Copyright (C) 1998-2009 PacketVideo
33306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong *
43306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong * Licensed under the Apache License, Version 2.0 (the "License");
53306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong * you may not use this file except in compliance with the License.
63306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong * You may obtain a copy of the License at
73306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong *
83306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong *      http://www.apache.org/licenses/LICENSE-2.0
93306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong *
103306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong * Unless required by applicable law or agreed to in writing, software
113306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong * distributed under the License is distributed on an "AS IS" BASIS,
123306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
133306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong * express or implied.
143306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong * See the License for the specific language governing permissions
153306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong * and limitations under the License.
163306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong * -------------------------------------------------------------------
173306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong */
183306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong/*
193306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong------------------------------------------------------------------------------
203306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong INPUT AND OUTPUT DEFINITIONS
213306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
223306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong Inputs:
233306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    xpos = x half-pixel of (x,y) coordinates within a VOP; motion
243306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong           compensated coordinates; native type
253306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    ypos = y half-pixel of (x,y) coordinates within a VOP; motion
263306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong           compensated coordinates; native type
273306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    comp = pointer to 8-bit compensated prediction values within a VOP;
283306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        computed by this module (i/o); full-pel resolution
293306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    c_prev = pointer to previous 8-bit prediction values within a VOP;
303306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong          values range from (0-255); full-pel resolution
313306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    sh_d = pointer to residual values used to compensate the predicted
323306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        value; values range from (-512 to 511); full-pel resolution
333306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    width = width of the VOP in pixels (x axis); full-pel resolution
343306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    rnd1 = rounding value for case when one dimension uses half-pel
353306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong           resolution
363306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    rnd2 = rounding value for case when two dimensions uses half-pel
373306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong           resolution
383306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    CBP = flag indicating whether residual is all zeros
393306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong          (0 -> all zeros, 1 -> not all zeros)
403306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        outside_flag = flag indicating whether motion vector is outside the
413306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong               VOP (0 -> inside, 1 -> outside)
423306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
433306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong Outputs:
443306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    returns 1
453306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
463306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong Local Stores/Buffers/Pointers Needed:
473306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    None
483306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
493306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong Global Stores/Buffers/Pointers Needed:
503306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    None
513306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
523306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong Pointers and Buffers Modified:
533306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    comp = buffer contains newly computed compensated prediction values
543306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
553306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong Local Stores Modified:
563306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    None
573306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
583306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong Global Stores Modified:
593306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    None
603306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
613306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong------------------------------------------------------------------------------
623306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong FUNCTION DESCRIPTION
633306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
643306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong Compute pixel values for a block in the current VOP. The prediction
653306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong values are generated by averaging pixel values in the previous VOP; the
663306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong block position in the previous frame is computed from the current block's
673306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong motion vector. The computed pixel values are then computed by adding the
683306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong prediction values to the block residual values.
693306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
703306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
713306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong------------------------------------------------------------------------------
723306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong*/
733306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
743306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong/*----------------------------------------------------------------------------
753306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong; INCLUDES
763306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong----------------------------------------------------------------------------*/
773306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong#include "mp4dec_lib.h"
783306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong#include "motion_comp.h"
793306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
803306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong#define OSCL_DISABLE_WARNING_CONV_POSSIBLE_LOSS_OF_DATA
813306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
823306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dongint GetPredAdvancedBy0x0(
833306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    uint8 *prev,        /* i */
843306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    uint8 *pred_block,      /* i */
853306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int width,      /* i */
863306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int pred_width_rnd /* i */
873306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong)
883306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong{
893306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    uint    i;      /* loop variable */
903306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int offset, offset2;
913306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    uint32  pred_word, word1, word2;
923306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int tmp;
933306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
943306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    /* initialize offset to adjust pixel counter */
953306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    /*    the next row; full-pel resolution      */
963306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    offset = width - B_SIZE; /* offset for prev */
973306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    offset2 = (pred_width_rnd >> 1) - 4; /* offset for pred_block */
983306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
99377b2ec9a2885f9b6405b07ba900a9e3f4349c38Kévin PETIT    tmp = (uintptr_t)prev & 0x3;
1003306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    pred_block -= offset2; /* preset */
1013306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1023306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    if (tmp == 0)  /* word-aligned */
1033306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    {
1043306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        for (i = B_SIZE; i > 0; i--)
1053306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
1063306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *((uint32*)(pred_block += offset2)) = *((uint32*)prev);
1073306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *((uint32*)(pred_block += 4)) = *((uint32*)(prev + 4));
1083306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            prev += width;
1093306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
1103306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        return 1;
1113306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    }
1123306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    else if (tmp == 1) /* first position */
1133306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    {
1143306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        prev--; /* word-aligned */
1153306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1163306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        for (i = B_SIZE; i > 0; i--)
1173306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
1183306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            word1 = *((uint32*)prev); /* read 4 bytes, b4 b3 b2 b1 */
1193306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            word2 = *((uint32*)(prev += 4));  /* read 4 bytes, b8 b7 b6 b5 */
1203306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            word1 >>= 8; /* 0 b4 b3 b2 */
1213306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            pred_word = word1 | (word2 << 24);  /* b5 b4 b3 b2 */
1223306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *((uint32*)(pred_block += offset2)) = pred_word;
1233306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1243306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            word1 = *((uint32*)(prev += 4)); /* b12 b11 b10 b9 */
1253306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            word2 >>= 8; /* 0 b8 b7 b6 */
1263306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            pred_word = word2 | (word1 << 24); /* b9 b8 b7 b6 */
1273306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *((uint32*)(pred_block += 4)) = pred_word;
1283306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1293306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            prev += offset;
1303306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
1313306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1323306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        return 1;
1333306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    }
1343306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    else if (tmp == 2) /* second position */
1353306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    {
1363306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        prev -= 2; /* word1-aligned */
1373306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1383306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        for (i = B_SIZE; i > 0; i--)
1393306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
1403306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            word1 = *((uint32*)prev); /* read 4 bytes, b4 b3 b2 b1 */
1413306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            word2 = *((uint32*)(prev += 4));  /* read 4 bytes, b8 b7 b6 b5 */
1423306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            word1 >>= 16; /* 0 0 b4 b3 */
1433306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            pred_word = word1 | (word2 << 16);  /* b6 b5 b4 b3 */
1443306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *((uint32*)(pred_block += offset2)) = pred_word;
1453306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1463306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            word1 = *((uint32*)(prev += 4)); /* b12 b11 b10 b9 */
1473306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            word2 >>= 16; /* 0 0 b8 b7 */
1483306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            pred_word = word2 | (word1 << 16); /* b10 b9 b8 b7 */
1493306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *((uint32*)(pred_block += 4)) = pred_word;
1503306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1513306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1523306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            prev += offset;
1533306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
1543306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1553306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        return 1;
1563306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    }
1573306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    else /* third position */
1583306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    {
1593306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        prev -= 3; /* word1-aligned */
1603306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1613306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        for (i = B_SIZE; i > 0; i--)
1623306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
1633306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            word1 = *((uint32*)prev); /* read 4 bytes, b4 b3 b2 b1 */
1643306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            word2 = *((uint32*)(prev += 4));  /* read 4 bytes, b8 b7 b6 b5 */
1653306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            word1 >>= 24; /* 0 0 0 b4 */
1663306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            pred_word = word1 | (word2 << 8);   /* b7 b6 b5 b4 */
1673306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *((uint32*)(pred_block += offset2)) = pred_word;
1683306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1693306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            word1 = *((uint32*)(prev += 4)); /* b12 b11 b10 b9 */
1703306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            word2 >>= 24; /* 0 0 0 b8 */
1713306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            pred_word = word2 | (word1 << 8); /* b11 b10 b9 b8 */
1723306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *((uint32*)(pred_block += 4)) = pred_word;
1733306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1743306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            prev += offset;
1753306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
1763306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1773306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        return 1;
1783306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    }
1793306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong}
1803306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1813306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong/**************************************************************************/
1823306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dongint GetPredAdvancedBy0x1(
1833306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    uint8 *prev,        /* i */
1843306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    uint8 *pred_block,      /* i */
1853306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int width,      /* i */
1863306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int pred_width_rnd /* i */
1873306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong)
1883306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong{
1893306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    uint    i;      /* loop variable */
1903306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int offset, offset2;
1913306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    uint32 word1, word2, word3, word12;
1923306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int tmp;
1933306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int rnd1;
1943306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    uint32 mask;
1953306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1963306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    /* initialize offset to adjust pixel counter */
1973306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    /*    the next row; full-pel resolution      */
1983306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    offset = width - B_SIZE; /* offset for prev */
1993306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    offset2 = (pred_width_rnd >> 1) - 4; /* offset of pred_block */
2003306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
2013306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    rnd1 = pred_width_rnd & 1;
2023306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
2033306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    /* Branch based on pixel location (half-pel or full-pel) for x and y */
2043306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    pred_block -= offset2; /* preset */
2053306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
206377b2ec9a2885f9b6405b07ba900a9e3f4349c38Kévin PETIT    tmp = (uintptr_t)prev & 3;
2073306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    mask = 254;
2083306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    mask |= (mask << 8);
2093306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    mask |= (mask << 16); /* 0xFEFEFEFE */
2103306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
2113306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    if (tmp == 0) /* word-aligned */
2123306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    {
2133306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        if (rnd1 == 1)
2143306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
2153306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            for (i = B_SIZE; i > 0; i--)
2163306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            {
2173306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)prev); /* b4 b3 b2 b1 */
2183306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = *((uint32*)(prev += 4)); /* b8 b7 b6 b5 */
2193306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = (word1 >> 8); /* 0 b4 b3 b2 */
2203306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 |= (word2 << 24); /* b5 b4 b3 b2 */
2213306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
2223306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 &= mask;
2233306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
2243306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 &= mask;
2253306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 1;
2263306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 + (word12 >> 1);
2273306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 += word3;
2283306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
2293306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
2303306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)(prev += 4)); /* b12 b11 b10 b9 */
2313306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = (word2 >> 8); /* 0 b8 b7 b6 */
2323306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 |= (word1 << 24); /* b9 b8 b7 b6 */
2333306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word2 | word12;
2343306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 &= mask;
2353306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask);  /* 0x1010101, check last bit */
2363306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 &= mask;
2373306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 >>= 1;
2383306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = word2 + (word12 >> 1);
2393306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 += word3;
2403306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
2413306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
2423306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                prev += offset;
2433306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            }
2443306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            return 1;
2453306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
2463306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        else /* rnd1 == 0 */
2473306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
2483306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            for (i = B_SIZE; i > 0; i--)
2493306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            {
2503306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)prev); /* b4 b3 b2 b1 */
2513306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
2523306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = *((uint32*)(prev += 4)); /* b8 b7 b6 b5 */
2533306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = (word1 >> 8); /* 0 b4 b3 b2 */
2543306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 |= (word2 << 24); /* b5 b4 b3 b2 */
2553306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word1 & word12; // rnd1 = 1; otherwise word3 = word1&word12
2563306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 &= mask;
2573306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
2583306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 &= mask;
2593306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 1;
2603306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 + (word12 >> 1);
2613306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 += word3;
2623306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
2633306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
2643306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)(prev += 4)); /* b12 b11 b10 b9 */
2653306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = (word2 >> 8); /* 0 b8 b7 b6 */
2663306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 |= (word1 << 24); /* b9 b8 b7 b6 */
2673306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word2 & word12;
2683306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 &= mask;
2693306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask);  /* 0x1010101, check last bit */
2703306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 &= mask;
2713306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 >>= 1;
2723306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = word2 + (word12 >> 1);
2733306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 += word3;
2743306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
2753306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
2763306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                prev += offset;
2773306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            }
2783306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            return 1;
2793306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        } /* rnd1 */
2803306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    }
2813306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    else if (tmp == 1)
2823306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    {
2833306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        prev--; /* word-aligned */
2843306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        if (rnd1 == 1)
2853306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
2863306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            for (i = B_SIZE; i > 0; i--)
2873306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            {
2883306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)prev); /* b3 b2 b1 b0 */
2893306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = *((uint32*)(prev += 4)); /* b7 b6 b5 b4 */
2903306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = (word1 >> 8); /* 0 b3 b2 b1 */
2913306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 16; /* 0 0 b3 b2 */
2923306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 |= (word2 << 24); /* b4 b3 b2 b1 */
2933306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 |= (word2 << 16); /* b5 b4 b3 b2 */
2943306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
2953306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 &= mask;
2963306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
2973306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 &= mask;
2983306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 1;
2993306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 + (word12 >> 1);
3003306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 += word3;
3013306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
3023306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
3033306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)(prev += 4)); /* b11 b10 b9 b8 */
3043306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = (word2 >> 8); /* 0 b7 b6 b5 */
3053306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 >>= 16; /* 0 0 b7 b6 */
3063306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 |= (word1 << 24); /* b8 b7 b6 b5 */
3073306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 |= (word1 << 16); /* b9 b8 b7 b6 */
3083306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word2 | word12; // rnd1 = 1; otherwise word3 = word2&word12
3093306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 &= mask;
3103306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
3113306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 &= mask;
3123306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 >>= 1;
3133306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = word2 + (word12 >> 1);
3143306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 += word3;
3153306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
3163306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
3173306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                prev += offset;
3183306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            }
3193306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            return 1;
3203306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
3213306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        else /* rnd1 = 0 */
3223306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
3233306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            for (i = B_SIZE; i > 0; i--)
3243306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            {
3253306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)prev); /* b3 b2 b1 b0 */
3263306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
3273306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = *((uint32*)(prev += 4)); /* b7 b6 b5 b4 */
3283306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = (word1 >> 8); /* 0 b3 b2 b1 */
3293306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 16; /* 0 0 b3 b2 */
3303306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 |= (word2 << 24); /* b4 b3 b2 b1 */
3313306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 |= (word2 << 16); /* b5 b4 b3 b2 */
3323306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word1 & word12;
3333306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 &= mask;
3343306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
3353306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 &= mask;
3363306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 1;
3373306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 + (word12 >> 1);
3383306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 += word3;
3393306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
3403306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
3413306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)(prev += 4)); /* b11 b10 b9 b8 */
3423306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = (word2 >> 8); /* 0 b7 b6 b5 */
3433306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 >>= 16; /* 0 0 b7 b6 */
3443306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 |= (word1 << 24); /* b8 b7 b6 b5 */
3453306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 |= (word1 << 16); /* b9 b8 b7 b6 */
3463306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word2 & word12;
3473306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 &= mask;
3483306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
3493306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 &= mask;
3503306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 >>= 1;
3513306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = word2 + (word12 >> 1);
3523306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 += word3;
3533306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
3543306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
3553306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                prev += offset;
3563306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            }
3573306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            return 1;
3583306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        } /* rnd1 */
3593306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    }
3603306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    else if (tmp == 2)
3613306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    {
3623306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        prev -= 2; /* word-aligned */
3633306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        if (rnd1 == 1)
3643306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
3653306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            for (i = B_SIZE; i > 0; i--)
3663306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            {
3673306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)prev); /* b2 b1 b0 bN1 */
3683306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = *((uint32*)(prev += 4)); /* b6 b5 b4 b3 */
3693306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = (word1 >> 16); /* 0 0 b2 b1 */
3703306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 24; /* 0 0 0 b2 */
3713306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 |= (word2 << 16); /* b4 b3 b2 b1 */
3723306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 |= (word2 << 8); /* b5 b4 b3 b2 */
3733306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
3743306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 &= mask;
3753306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
3763306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 &= mask;
3773306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 1;
3783306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 + (word12 >> 1);
3793306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 += word3;
3803306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
3813306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
3823306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)(prev += 4)); /* b10 b9 b8 b7 */
3833306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = (word2 >> 16); /* 0 0 b6 b5 */
3843306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 >>= 24; /* 0 0 0 b6 */
3853306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 |= (word1 << 16); /* b8 b7 b6 b5 */
3863306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 |= (word1 << 8); /* b9 b8 b7 b6 */
3873306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word2 | word12; // rnd1 = 1; otherwise word3 = word1&word12
3883306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 &= mask;
3893306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
3903306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 &= mask;
3913306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 >>= 1;
3923306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = word2 + (word12 >> 1);
3933306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 += word3;
3943306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
3953306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                prev += offset;
3963306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            }
3973306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            return 1;
3983306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
3993306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        else /* rnd1 == 0 */
4003306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
4013306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            for (i = B_SIZE; i > 0; i--)
4023306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            {
4033306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)prev); /* b2 b1 b0 bN1 */
4043306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = *((uint32*)(prev += 4)); /* b6 b5 b4 b3 */
4053306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = (word1 >> 16); /* 0 0 b2 b1 */
4063306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 24; /* 0 0 0 b2 */
4073306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 |= (word2 << 16); /* b4 b3 b2 b1 */
4083306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 |= (word2 << 8); /* b5 b4 b3 b2 */
4093306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word1 & word12; // rnd1 = 1; otherwise word3 = word1&word12
4103306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 &= mask;
4113306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
4123306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 &= mask;
4133306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 1;
4143306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 + (word12 >> 1);
4153306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 += word3;
4163306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
4173306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
4183306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)(prev += 4)); /* b10 b9 b8 b7 */
4193306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = (word2 >> 16); /* 0 0 b6 b5 */
4203306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 >>= 24; /* 0 0 0 b6 */
4213306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 |= (word1 << 16); /* b8 b7 b6 b5 */
4223306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 |= (word1 << 8); /* b9 b8 b7 b6 */
4233306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word2 & word12; // rnd1 = 1; otherwise word3 = word1&word12
4243306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 &= mask;
4253306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
4263306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 &= mask;
4273306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 >>= 1;
4283306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = word2 + (word12 >> 1);
4293306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 += word3;
4303306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
4313306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                prev += offset;
4323306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            }
4333306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            return 1;
4343306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
4353306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    }
4363306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    else /* tmp = 3 */
4373306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    {
4383306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        prev -= 3; /* word-aligned */
4393306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        if (rnd1 == 1)
4403306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
4413306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            for (i = B_SIZE; i > 0; i--)
4423306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            {
4433306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)prev); /* b1 b0 bN1 bN2 */
4443306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = *((uint32*)(prev += 4)); /* b5 b4 b3 b2 */
4453306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = (word1 >> 24); /* 0 0 0 b1 */
4463306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 |= (word2 << 8); /* b4 b3 b2 b1 */
4473306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word2;
4483306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
4493306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 &= mask;
4503306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
4513306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 &= mask;
4523306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 1;
4533306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 + (word12 >> 1);
4543306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 += word3;
4553306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
4563306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
4573306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)(prev += 4)); /* b9 b8 b7 b6 */
4583306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = (word2 >> 24); /* 0 0 0 b5 */
4593306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 |= (word1 << 8); /* b8 b7 b6 b5 */
4603306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = word1; /* b9 b8 b7 b6 */
4613306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word2 | word12; // rnd1 = 1; otherwise word3 = word1&word12
4623306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 &= mask;
4633306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
4643306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 &= mask;
4653306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 >>= 1;
4663306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = word2 + (word12 >> 1);
4673306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 += word3;
4683306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
4693306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                prev += offset;
4703306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            }
4713306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            return 1;
4723306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
4733306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        else
4743306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
4753306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            for (i = B_SIZE; i > 0; i--)
4763306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            {
4773306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)prev); /* b1 b0 bN1 bN2 */
4783306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = *((uint32*)(prev += 4)); /* b5 b4 b3 b2 */
4793306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = (word1 >> 24); /* 0 0 0 b1 */
4803306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 |= (word2 << 8); /* b4 b3 b2 b1 */
4813306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word2;
4823306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word1 & word12; // rnd1 = 1; otherwise word3 = word1&word12
4833306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 &= mask;
4843306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
4853306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 &= mask;
4863306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 1;
4873306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 + (word12 >> 1);
4883306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 += word3;
4893306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
4903306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
4913306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)(prev += 4)); /* b9 b8 b7 b6 */
4923306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = (word2 >> 24); /* 0 0 0 b5 */
4933306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 |= (word1 << 8); /* b8 b7 b6 b5 */
4943306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = word1; /* b9 b8 b7 b6 */
4953306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word2 & word12; // rnd1 = 1; otherwise word3 = word1&word12
4963306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 &= mask;
4973306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
4983306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 &= mask;
4993306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 >>= 1;
5003306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = word2 + (word12 >> 1);
5013306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 += word3;
5023306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
5033306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                prev += offset;
5043306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            }
5053306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            return 1;
5063306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
5073306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    }
5083306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong}
5093306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
5103306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong/**************************************************************************/
5113306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dongint GetPredAdvancedBy1x0(
5123306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    uint8 *prev,        /* i */
5133306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    uint8 *pred_block,      /* i */
5143306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int width,      /* i */
5153306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int pred_width_rnd /* i */
5163306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong)
5173306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong{
5183306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    uint    i;      /* loop variable */
5193306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int offset, offset2;
5203306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    uint32  word1, word2, word3, word12, word22;
5213306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int tmp;
5223306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int rnd1;
5233306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    uint32 mask;
5243306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
5253306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    /* initialize offset to adjust pixel counter */
5263306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    /*    the next row; full-pel resolution      */
5273306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    offset = width - B_SIZE; /* offset for prev */
5283306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    offset2 = (pred_width_rnd >> 1) - 4; /* offset for pred_block */
5293306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
5303306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    rnd1 = pred_width_rnd & 1;
5313306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
5323306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    /* Branch based on pixel location (half-pel or full-pel) for x and y */
5333306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    pred_block -= offset2; /* preset */
5343306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
535377b2ec9a2885f9b6405b07ba900a9e3f4349c38Kévin PETIT    tmp = (uintptr_t)prev & 3;
5363306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    mask = 254;
5373306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    mask |= (mask << 8);
5383306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    mask |= (mask << 16); /* 0xFEFEFEFE */
5393306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
5403306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    if (tmp == 0) /* word-aligned */
5413306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    {
5423306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        prev -= 4;
5433306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        if (rnd1 == 1)
5443306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
5453306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            for (i = B_SIZE; i > 0; i--)
5463306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            {
5473306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)(prev += 4));
5483306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = *((uint32*)(prev + width));
5493306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word1 | word2; // rnd1 = 1; otherwise word3 = word1&word2
5503306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 &= mask;
5513306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
5523306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 &= mask;
5533306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 1;
5543306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 + (word2 >> 1);
5553306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 += word3;
5563306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += offset2)) = word1;
5573306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)(prev += 4));
5583306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = *((uint32*)(prev + width));
5593306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word1 | word2; // rnd1 = 1; otherwise word3 = word1&word2
5603306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 &= mask;
5613306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
5623306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 &= mask;
5633306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 1;
5643306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 + (word2 >> 1);
5653306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 += word3;
5663306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += 4)) = word1;
5673306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
5683306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                prev += offset;
5693306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            }
5703306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            return 1;
5713306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
5723306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        else   /* rnd1 = 0 */
5733306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
5743306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            for (i = B_SIZE; i > 0; i--)
5753306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            {
5763306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)(prev += 4));
5773306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = *((uint32*)(prev + width));
5783306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word1 & word2;  /* rnd1 = 0; */
5793306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 &= mask;
5803306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
5813306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 &= mask;
5823306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 1;
5833306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 + (word2 >> 1);
5843306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 += word3;
5853306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += offset2)) = word1;
5863306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)(prev += 4));
5873306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = *((uint32*)(prev + width));
5883306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word1 & word2;  /* rnd1 = 0; */
5893306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 &= mask;
5903306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
5913306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 &= mask;
5923306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 1;
5933306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 + (word2 >> 1);
5943306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 += word3;
5953306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += 4)) = word1;
5963306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
5973306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                prev += offset;
5983306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            }
5993306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            return 1;
6003306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
6013306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    }
6023306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    else if (tmp == 1)
6033306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    {
6043306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        prev--; /* word-aligned */
6053306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        if (rnd1 == 1)
6063306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
6073306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            for (i = B_SIZE; i > 0; i--)
6083306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            {
6093306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = *((uint32*)prev); /* read b4 b3 b2 b1 */
6103306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 = *((uint32*)(prev + width));
6113306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
6123306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)(prev += 4)); /* read b8 b7 b6 b5 */
6133306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = *((uint32*)(prev + width));
6143306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 >>= 8; /* 0 b4 b3 b2 */
6153306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 >>= 8;
6163306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = word12 | (word1 << 24); /* b5 b4 b3 b2 */
6173306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 = word22 | (word2 << 24);
6183306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word12 | word22;
6193306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 &= mask;
6203306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 &= mask;
6213306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
6223306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 >>= 1;
6233306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = word12 + (word22 >> 1);
6243306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 += word3;
6253306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += offset2)) = word12;
6263306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
6273306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = *((uint32*)(prev += 4)); /* read b12 b11 b10 b9 */
6283306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 = *((uint32*)(prev + width));
6293306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 8; /* 0 b8 b7 b6 */
6303306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 >>= 8;
6313306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 | (word12 << 24); /* b9 b8 b7 b6 */
6323306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = word2 | (word22 << 24);
6333306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word1 | word2;
6343306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 &= mask;
6353306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 &= mask;
6363306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
6373306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 1;
6383306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 + (word2 >> 1);
6393306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 += word3;
6403306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += 4)) = word1;
6413306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                prev += offset;
6423306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            }
6433306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            return 1;
6443306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
6453306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        else /* rnd1 = 0 */
6463306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
6473306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            for (i = B_SIZE; i > 0; i--)
6483306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            {
6493306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = *((uint32*)prev); /* read b4 b3 b2 b1 */
6503306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 = *((uint32*)(prev + width));
6513306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
6523306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)(prev += 4)); /* read b8 b7 b6 b5 */
6533306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = *((uint32*)(prev + width));
6543306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 >>= 8; /* 0 b4 b3 b2 */
6553306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 >>= 8;
6563306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = word12 | (word1 << 24); /* b5 b4 b3 b2 */
6573306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 = word22 | (word2 << 24);
6583306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word12 & word22;
6593306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 &= mask;
6603306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 &= mask;
6613306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
6623306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 >>= 1;
6633306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = word12 + (word22 >> 1);
6643306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 += word3;
6653306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += offset2)) = word12;
6663306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
6673306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = *((uint32*)(prev += 4)); /* read b12 b11 b10 b9 */
6683306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 = *((uint32*)(prev + width));
6693306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 8; /* 0 b8 b7 b6 */
6703306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 >>= 8;
6713306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 | (word12 << 24); /* b9 b8 b7 b6 */
6723306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = word2 | (word22 << 24);
6733306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word1 & word2;
6743306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 &= mask;
6753306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 &= mask;
6763306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
6773306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 1;
6783306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 + (word2 >> 1);
6793306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 += word3;
6803306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += 4)) = word1;
6813306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                prev += offset;
6823306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            }
6833306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            return 1;
6843306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
6853306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    }
6863306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    else if (tmp == 2)
6873306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    {
6883306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        prev -= 2; /* word-aligned */
6893306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        if (rnd1 == 1)
6903306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
6913306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            for (i = B_SIZE; i > 0; i--)
6923306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            {
6933306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = *((uint32*)prev); /* read b4 b3 b2 b1 */
6943306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 = *((uint32*)(prev + width));
6953306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
6963306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)(prev += 4)); /* read b8 b7 b6 b5 */
6973306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = *((uint32*)(prev + width));
6983306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 >>= 16; /* 0 0 b4 b3 */
6993306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 >>= 16;
7003306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = word12 | (word1 << 16); /* b6 b5 b4 b3 */
7013306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 = word22 | (word2 << 16);
7023306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word12 | word22;
7033306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 &= mask;
7043306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 &= mask;
7053306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
7063306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 >>= 1;
7073306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = word12 + (word22 >> 1);
7083306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 += word3;
7093306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += offset2)) = word12;
7103306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
7113306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = *((uint32*)(prev += 4)); /* read b12 b11 b10 b9 */
7123306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 = *((uint32*)(prev + width));
7133306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 16; /* 0 0 b8 b7 */
7143306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 >>= 16;
7153306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 | (word12 << 16); /* b10 b9 b8 b7 */
7163306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = word2 | (word22 << 16);
7173306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word1 | word2;
7183306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 &= mask;
7193306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 &= mask;
7203306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
7213306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 1;
7223306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 + (word2 >> 1);
7233306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 += word3;
7243306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += 4)) = word1;
7253306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                prev += offset;
7263306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            }
7273306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            return 1;
7283306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
7293306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        else /* rnd1 = 0 */
7303306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
7313306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            for (i = B_SIZE; i > 0; i--)
7323306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            {
7333306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = *((uint32*)prev); /* read b4 b3 b2 b1 */
7343306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 = *((uint32*)(prev + width));
7353306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
7363306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)(prev += 4)); /* read b8 b7 b6 b5 */
7373306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = *((uint32*)(prev + width));
7383306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 >>= 16; /* 0 0 b4 b3 */
7393306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 >>= 16;
7403306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = word12 | (word1 << 16); /* b6 b5 b4 b3 */
7413306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 = word22 | (word2 << 16);
7423306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word12 & word22;
7433306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 &= mask;
7443306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 &= mask;
7453306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
7463306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 >>= 1;
7473306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = word12 + (word22 >> 1);
7483306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 += word3;
7493306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += offset2)) = word12;
7503306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
7513306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = *((uint32*)(prev += 4)); /* read b12 b11 b10 b9 */
7523306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 = *((uint32*)(prev + width));
7533306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 16; /* 0 0 b8 b7 */
7543306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 >>= 16;
7553306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 | (word12 << 16); /* b10 b9 b8 b7 */
7563306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = word2 | (word22 << 16);
7573306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word1 & word2;
7583306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 &= mask;
7593306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 &= mask;
7603306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
7613306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 1;
7623306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 + (word2 >> 1);
7633306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 += word3;
7643306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += 4)) = word1;
7653306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                prev += offset;
7663306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            }
7673306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
7683306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            return 1;
7693306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
7703306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    }
7713306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    else /* tmp == 3 */
7723306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    {
7733306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        prev -= 3; /* word-aligned */
7743306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        if (rnd1 == 1)
7753306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
7763306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            for (i = B_SIZE; i > 0; i--)
7773306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            {
7783306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = *((uint32*)prev); /* read b4 b3 b2 b1 */
7793306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 = *((uint32*)(prev + width));
7803306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
7813306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)(prev += 4)); /* read b8 b7 b6 b5 */
7823306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = *((uint32*)(prev + width));
7833306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 >>= 24; /* 0 0 0 b4 */
7843306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 >>= 24;
7853306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = word12 | (word1 << 8); /* b7 b6 b5 b4 */
7863306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 = word22 | (word2 << 8);
7873306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word12 | word22;
7883306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 &= mask;
7893306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 &= mask;
7903306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
7913306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 >>= 1;
7923306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = word12 + (word22 >> 1);
7933306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 += word3;
7943306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += offset2)) = word12;
7953306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
7963306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = *((uint32*)(prev += 4)); /* read b12 b11 b10 b9 */
7973306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 = *((uint32*)(prev + width));
7983306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 24; /* 0 0 0 b8 */
7993306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 >>= 24;
8003306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 | (word12 << 8); /* b11 b10 b9 b8 */
8013306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = word2 | (word22 << 8);
8023306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word1 | word2;
8033306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 &= mask;
8043306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 &= mask;
8053306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
8063306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 1;
8073306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 + (word2 >> 1);
8083306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 += word3;
8093306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += 4)) = word1;
8103306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                prev += offset;
8113306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            }
8123306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            return 1;
8133306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
8143306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        else /* rnd1 = 0 */
8153306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
8163306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            for (i = B_SIZE; i > 0; i--)
8173306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            {
8183306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = *((uint32*)prev); /* read b4 b3 b2 b1 */
8193306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 = *((uint32*)(prev + width));
8203306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
8213306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = *((uint32*)(prev += 4)); /* read b8 b7 b6 b5 */
8223306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = *((uint32*)(prev + width));
8233306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 >>= 24; /* 0 0 0 b4 */
8243306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 >>= 24;
8253306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = word12 | (word1 << 8); /* b7 b6 b5 b4 */
8263306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 = word22 | (word2 << 8);
8273306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word12 & word22;
8283306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 &= mask;
8293306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 &= mask;
8303306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
8313306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 >>= 1;
8323306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = word12 + (word22 >> 1);
8333306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 += word3;
8343306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += offset2)) = word12;
8353306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
8363306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word12 = *((uint32*)(prev += 4)); /* read b12 b11 b10 b9 */
8373306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word22 = *((uint32*)(prev + width));
8383306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 24; /* 0 0 0 b8 */
8393306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 >>= 24;
8403306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 | (word12 << 8); /* b11 b10 b9 b8 */
8413306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 = word2 | (word22 << 8);
8423306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 = word1 & word2;
8433306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 &= mask;
8443306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word2 &= mask;
8453306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word3 &= (~mask); /* 0x1010101, check last bit */
8463306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 >>= 1;
8473306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 = word1 + (word2 >> 1);
8483306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                word1 += word3;
8493306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                *((uint32*)(pred_block += 4)) = word1;
8503306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong                prev += offset;
8513306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            }
8523306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            return 1;
8533306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        } /* rnd */
8543306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    } /* tmp */
8553306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong}
8563306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
8573306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong/**********************************************************************************/
8583306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dongint GetPredAdvancedBy1x1(
8593306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    uint8 *prev,        /* i */
8603306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    uint8 *pred_block,      /* i */
8613306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int width,      /* i */
8623306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int pred_width_rnd /* i */
8633306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong)
8643306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong{
8653306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    uint    i;      /* loop variable */
8663306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int offset, offset2;
8673306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    uint32  x1, x2, x1m, x2m, y1, y2, y1m, y2m; /* new way */
8683306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int tmp;
8693306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    int rnd1, rnd2;
8703306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    uint32 mask;
8713306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
8723306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    /* initialize offset to adjust pixel counter */
8733306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    /*    the next row; full-pel resolution      */
8743306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    offset = width - B_SIZE; /* offset for prev */
8753306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    offset2 = (pred_width_rnd >> 1) - 8; /* offset for pred_block */
8763306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
8773306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    rnd1 = pred_width_rnd & 1;
8783306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
8793306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    rnd2 = rnd1 + 1;
8803306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    rnd2 |= (rnd2 << 8);
8813306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    rnd2 |= (rnd2 << 16);
8823306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
8833306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    mask = 0x3F;
8843306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    mask |= (mask << 8);
8853306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    mask |= (mask << 16); /* 0x3f3f3f3f */
8863306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
887377b2ec9a2885f9b6405b07ba900a9e3f4349c38Kévin PETIT    tmp = (uintptr_t)prev & 3;
8883306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
8893306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    pred_block -= 4; /* preset */
8903306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
8913306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    if (tmp == 0) /* word-aligned */
8923306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    {
8933306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        for (i = B_SIZE; i > 0; i--)
8943306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
8953306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 = *((uint32*)prev); /* load a3 a2 a1 a0 */
8963306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2 = *((uint32*)(prev + width)); /* load b3 b2 b1 b0, another line */
8973306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 = *((uint32*)(prev += 4)); /* a7 a6 a5 a4 */
8983306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 = *((uint32*)(prev + width)); /* b7 b6 b5 b4 */
8993306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
9003306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
9013306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2m = (x2 >> 2) & mask;
9023306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 = x1 ^(x1m << 2);
9033306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2 = x2 ^(x2m << 2);
9043306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1m += x2m;
9053306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 += x2;
9063306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
9073306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* x2m, x2 free */
9083306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
9093306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2m = (y2 >> 2) & mask;
9103306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 = y1 ^(y1m << 2);
9113306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 = y2 ^(y2m << 2);
9123306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1m += y2m;
9133306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 += y2;
9143306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
9153306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* y2m, y2 free */
9163306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* x2m, x2 free */
9173306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2 = *((uint32*)(prev += 4)); /* a11 a10 a9 a8 */
9183306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 = *((uint32*)(prev + width)); /* b11 b10 b9 b8 */
9193306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2m = (x2 >> 2) & mask;
9203306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2m = (y2 >> 2) & mask;
9213306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2 = x2 ^(x2m << 2);
9223306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 = y2 ^(y2m << 2);
9233306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2m += y2m;
9243306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2 += y2;
9253306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* y2m, y2 free */
9263306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
9273306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* now operate on x1m, x1, y1m, y1, x2m, x2 */
9283306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
9293306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
9303306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
9313306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* x1, y1, x2 */
9323306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
9333306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2m = x1m >> 8;
9343306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 = x1 >> 8;
9353306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2m |= (y1m << 24);  /* a4+b4, a3+b3, a2+b2, a1+b1 */
9363306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 |= (y1 << 24);
9373306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1m += y2m;  /* a3+b3+a4+b4, ....., a0+b0+a1+b1 */
9383306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 += y2;
9393306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 += rnd2;
9403306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 &= (mask << 2);
9413306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1m += (x1 >> 2);
9423306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *((uint32*)(pred_block += 4)) = x1m; /* save x1m */
9433306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
9443306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2m = y1m >> 8;
9453306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 = y1 >> 8;
9463306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2m |= (x2m << 24); /* a8+b8, a7+b7, a6+b6, a5+b5 */
9473306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 |= (x2 << 24);
9483306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1m += y2m;  /* a7+b7+a8+b8, ....., a4+b4+a5+b5 */
9493306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 += y2;
9503306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 += rnd2;
9513306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 &= (mask << 2);
9523306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1m += (y1 >> 2);
9533306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *((uint32*)(pred_block += 4)) = y1m; /* save y1m */
9543306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
9553306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            pred_block += offset2;
9563306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            prev += offset;
9573306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
9583306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
9593306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        return 1;
9603306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    }
9613306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    else if (tmp == 1)
9623306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    {
9633306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        prev--; /* to word-aligned */
9643306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        for (i = B_SIZE; i > 0; i--)
9653306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
9663306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 = *((uint32*)prev); /* load a3 a2 a1 a0 */
9673306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2 = *((uint32*)(prev + width)); /* load b3 b2 b1 b0, another line */
9683306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 = *((uint32*)(prev += 4)); /* a7 a6 a5 a4 */
9693306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 = *((uint32*)(prev + width)); /* b7 b6 b5 b4 */
9703306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
9713306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
9723306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2m = (x2 >> 2) & mask;
9733306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 = x1 ^(x1m << 2);
9743306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2 = x2 ^(x2m << 2);
9753306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1m += x2m;
9763306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 += x2;
9773306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
9783306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* x2m, x2 free */
9793306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
9803306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2m = (y2 >> 2) & mask;
9813306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 = y1 ^(y1m << 2);
9823306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 = y2 ^(y2m << 2);
9833306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1m += y2m;
9843306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 += y2;
9853306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
9863306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* y2m, y2 free */
9873306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* x2m, x2 free */
9883306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2 = *((uint32*)(prev += 4)); /* a11 a10 a9 a8 */
9893306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 = *((uint32*)(prev + width)); /* b11 b10 b9 b8 */
9903306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2m = (x2 >> 2) & mask;
9913306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2m = (y2 >> 2) & mask;
9923306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2 = x2 ^(x2m << 2);
9933306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 = y2 ^(y2m << 2);
9943306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2m += y2m;
9953306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2 += y2;
9963306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* y2m, y2 free */
9973306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
9983306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* now operate on x1m, x1, y1m, y1, x2m, x2 */
9993306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
10003306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
10013306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
10023306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* x1, y1, x2 */
10033306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
10043306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1m >>= 8 ;
10053306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 >>= 8;
10063306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1m |= (y1m << 24);  /* a4+b4, a3+b3, a2+b2, a1+b1 */
10073306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 |= (y1 << 24);
10083306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2m = (y1m << 16);
10093306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 = (y1 << 16);
10103306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2m |= (x1m >> 8); /* a5+b5, a4+b4, a3+b3, a2+b2 */
10113306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 |= (x1 >> 8);
10123306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 += rnd2;
10133306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1m += y2m;  /* a4+b4+a5+b5, ....., a1+b1+a2+b2 */
10143306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 += y2;
10153306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 &= (mask << 2);
10163306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1m += (x1 >> 2);
10173306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *((uint32*)(pred_block += 4)) = x1m; /* save x1m */
10183306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
10193306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1m >>= 8;
10203306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 >>= 8;
10213306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1m |= (x2m << 24); /* a8+b8, a7+b7, a6+b6, a5+b5 */
10223306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 |= (x2 << 24);
10233306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2m = (x2m << 16);
10243306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 = (x2 << 16);
10253306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2m |= (y1m >> 8); /*  a9+b9, a8+b8, a7+b7, a6+b6,*/
10263306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 |= (y1 >> 8);
10273306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 += rnd2;
10283306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1m += y2m;  /* a8+b8+a9+b9, ....., a5+b5+a6+b6 */
10293306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 += y2;
10303306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 &= (mask << 2);
10313306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1m += (y1 >> 2);
10323306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *((uint32*)(pred_block += 4)) = y1m; /* save y1m */
10333306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
10343306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            pred_block += offset2;
10353306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            prev += offset;
10363306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
10373306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        return 1;
10383306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    }
10393306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    else if (tmp == 2)
10403306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    {
10413306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        prev -= 2; /* to word-aligned */
10423306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        for (i = B_SIZE; i > 0; i--)
10433306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
10443306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 = *((uint32*)prev); /* load a3 a2 a1 a0 */
10453306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2 = *((uint32*)(prev + width)); /* load b3 b2 b1 b0, another line */
10463306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 = *((uint32*)(prev += 4)); /* a7 a6 a5 a4 */
10473306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 = *((uint32*)(prev + width)); /* b7 b6 b5 b4 */
10483306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
10493306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
10503306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2m = (x2 >> 2) & mask;
10513306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 = x1 ^(x1m << 2);
10523306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2 = x2 ^(x2m << 2);
10533306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1m += x2m;
10543306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 += x2;
10553306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
10563306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* x2m, x2 free */
10573306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
10583306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2m = (y2 >> 2) & mask;
10593306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 = y1 ^(y1m << 2);
10603306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 = y2 ^(y2m << 2);
10613306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1m += y2m;
10623306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 += y2;
10633306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
10643306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* y2m, y2 free */
10653306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* x2m, x2 free */
10663306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2 = *((uint32*)(prev += 4)); /* a11 a10 a9 a8 */
10673306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 = *((uint32*)(prev + width)); /* b11 b10 b9 b8 */
10683306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2m = (x2 >> 2) & mask;
10693306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2m = (y2 >> 2) & mask;
10703306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2 = x2 ^(x2m << 2);
10713306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 = y2 ^(y2m << 2);
10723306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2m += y2m;
10733306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2 += y2;
10743306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* y2m, y2 free */
10753306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
10763306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* now operate on x1m, x1, y1m, y1, x2m, x2 */
10773306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
10783306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
10793306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
10803306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* x1, y1, x2 */
10813306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
10823306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1m >>= 16 ;
10833306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 >>= 16;
10843306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1m |= (y1m << 16);  /* a5+b5, a4+b4, a3+b3, a2+b2 */
10853306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 |= (y1 << 16);
10863306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2m = (y1m << 8);
10873306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 = (y1 << 8);
10883306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2m |= (x1m >> 8); /* a6+b6, a5+b5, a4+b4, a3+b3 */
10893306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 |= (x1 >> 8);
10903306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 += rnd2;
10913306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1m += y2m;  /* a5+b5+a6+b6, ....., a2+b2+a3+b3 */
10923306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 += y2;
10933306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 &= (mask << 2);
10943306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1m += (x1 >> 2);
10953306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *((uint32*)(pred_block += 4)) = x1m; /* save x1m */
10963306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
10973306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1m >>= 16;
10983306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 >>= 16;
10993306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1m |= (x2m << 16); /* a9+b9, a8+b8, a7+b7, a6+b6 */
11003306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 |= (x2 << 16);
11013306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2m = (x2m << 8);
11023306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 = (x2 << 8);
11033306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2m |= (y1m >> 8); /*  a10+b10, a9+b9, a8+b8, a7+b7,*/
11043306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 |= (y1 >> 8);
11053306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 += rnd2;
11063306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1m += y2m;  /* a9+b9+a10+b10, ....., a6+b6+a7+b7 */
11073306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 += y2;
11083306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 &= (mask << 2);
11093306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1m += (y1 >> 2);
11103306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *((uint32*)(pred_block += 4)) = y1m; /* save y1m */
11113306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
11123306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            pred_block += offset2;
11133306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            prev += offset;
11143306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
11153306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        return 1;
11163306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    }
11173306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    else /* tmp == 3 */
11183306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    {
11193306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        prev -= 3; /* to word-aligned */
11203306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        for (i = B_SIZE; i > 0; i--)
11213306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        {
11223306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 = *((uint32*)prev); /* load a3 a2 a1 a0 */
11233306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2 = *((uint32*)(prev + width)); /* load b3 b2 b1 b0, another line */
11243306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 = *((uint32*)(prev += 4)); /* a7 a6 a5 a4 */
11253306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 = *((uint32*)(prev + width)); /* b7 b6 b5 b4 */
11263306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
11273306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
11283306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2m = (x2 >> 2) & mask;
11293306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 = x1 ^(x1m << 2);
11303306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2 = x2 ^(x2m << 2);
11313306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1m += x2m;
11323306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 += x2;
11333306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
11343306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* x2m, x2 free */
11353306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
11363306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2m = (y2 >> 2) & mask;
11373306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 = y1 ^(y1m << 2);
11383306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 = y2 ^(y2m << 2);
11393306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1m += y2m;
11403306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 += y2;
11413306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
11423306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* y2m, y2 free */
11433306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* x2m, x2 free */
11443306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2 = *((uint32*)(prev += 4)); /* a11 a10 a9 a8 */
11453306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 = *((uint32*)(prev + width)); /* b11 b10 b9 b8 */
11463306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2m = (x2 >> 2) & mask;
11473306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2m = (y2 >> 2) & mask;
11483306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2 = x2 ^(x2m << 2);
11493306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y2 = y2 ^(y2m << 2);
11503306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2m += y2m;
11513306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x2 += y2;
11523306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* y2m, y2 free */
11533306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
11543306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* now operate on x1m, x1, y1m, y1, x2m, x2 */
11553306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
11563306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
11573306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
11583306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            /* x1, y1, x2 */
11593306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
11603306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1m >>= 24 ;
11613306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 >>= 24;
11623306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1m |= (y1m << 8);  /* a6+b6, a5+b5, a4+b4, a3+b3 */
11633306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 |= (y1 << 8);
11643306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
11653306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1m += y1m;  /* a6+b6+a7+b7, ....., a3+b3+a4+b4 */
11663306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 += y1;
11673306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 += rnd2;
11683306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1 &= (mask << 2);
11693306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            x1m += (x1 >> 2);
11703306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *((uint32*)(pred_block += 4)) = x1m; /* save x1m */
11713306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
11723306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1m >>= 24;
11733306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 >>= 24;
11743306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1m |= (x2m << 8); /* a10+b10, a9+b9, a8+b8, a7+b7 */
11753306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 |= (x2 << 8);
11763306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1m += x2m;  /* a10+b10+a11+b11, ....., a7+b7+a8+b8 */
11773306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 += x2;
11783306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 += rnd2;
11793306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1 &= (mask << 2);
11803306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            y1m += (y1 >> 2);
11813306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            *((uint32*)(pred_block += 4)) = y1m; /* save y1m */
11823306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
11833306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            pred_block += offset2;
11843306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong            prev += offset;
11853306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        }
11863306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong        return 1;
11873306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong    }
11883306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong}
11893306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
11903306cfee3bf38ab207a0504e49c2d492bb73ffbfJames Dong
1191