motion_comp.cpp revision 59f566c4ec3dfc097ad8163523e522280b27e5c3
1/* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18#include "mp4lib_int.h"
19#include "mp4enc_lib.h"
20
21//const static Int roundtab4[] = {0,1,1,1};
22//const static Int roundtab8[] = {0,0,1,1,1,1,1,2};
23//const static Int roundtab12[] = {0,0,0,1,1,1,1,1,1,1,2,2};
24const static Int roundtab16[] = {0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2};
25
26#define FORWARD_MODE    1
27#define BACKWARD_MODE   2
28#define BIDIRECTION_MODE    3
29#define DIRECT_MODE         4
30
31#ifdef __cplusplus
32extern "C"
33{
34#endif
35    /*Function Prototype */
36    /* no-edge padding */
37    Int EncGetPredOutside(Int xpos, Int ypos, UChar *c_prev, UChar *rec,
38    Int width, Int height, Int rnd1);
39
40    void Copy_MB_from_Vop(UChar *comp, Int yChan[][NCOEFF_BLOCK], Int width);
41    void Copy_B_from_Vop(UChar *comp, Int cChan[], Int width);
42    void Copy_MB_into_Vop(UChar *comp, Int yChan[][NCOEFF_BLOCK], Int width);
43    void Copy_B_into_Vop(UChar *comp, Int cChan[], Int width);
44    void get_MB(UChar *c_prev, UChar *c_prev_u  , UChar *c_prev_v,
45                Short mb[6][64], Int lx, Int lx_uv);
46
47    Int GetPredAdvBy0x0(
48        UChar *c_prev,      /* i */
49        UChar *pred_block,      /* i */
50        Int lx,     /* i */
51        Int rnd1 /* i */
52    );
53
54    Int GetPredAdvBy0x1(
55        UChar *c_prev,      /* i */
56        UChar *pred_block,      /* i */
57        Int lx,     /* i */
58        Int rnd1 /* i */
59    );
60
61    Int GetPredAdvBy1x0(
62        UChar *c_prev,      /* i */
63        UChar *pred_block,      /* i */
64        Int lx,     /* i */
65        Int rnd1 /* i */
66    );
67
68    Int GetPredAdvBy1x1(
69        UChar *c_prev,      /* i */
70        UChar *pred_block,      /* i */
71        Int lx,     /* i */
72        Int rnd1 /* i */
73    );
74
75    static Int(*const GetPredAdvBTable[2][2])(UChar*, UChar*, Int, Int) =
76    {
77        {&GetPredAdvBy0x0, &GetPredAdvBy0x1},
78        {&GetPredAdvBy1x0, &GetPredAdvBy1x1}
79    };
80
81
82#ifdef __cplusplus
83}
84#endif
85
86
87/* ======================================================================== */
88/*  Function : getMotionCompensatedMB( )                                    */
89/*  Date     : 4/17/2001                                                    */
90/*  Purpose  : Get the motion compensate block into video->predictionMB     */
91/*              and generate video->predictionErrorMB                       */
92/*              modified from MBMotionComp() function in the decoder        */
93/*  In/out   :                                                              */
94/*  Return   :                                                              */
95/*  Modified :                                                              */
96/* ======================================================================== */
97
98void getMotionCompensatedMB(VideoEncData *video, Int ind_x, Int ind_y, Int offset)
99{
100    Vop *prevVop = video->forwardRefVop; //reference frame
101    Vop *currVop = video->currVop;
102    Int mbnum = video->mbnum;       //mb index
103    MOT *mot = video->mot[mbnum];
104    Int ypos, xpos;
105    UChar *c_prev, *cu_prev, *cv_prev;
106    UChar *c_rec, *cu_rec, *cv_rec;
107    Int height, pitch, pitch_uv, height_uv;
108    Int mode = video->headerInfo.Mode[mbnum];  /* get mode */
109    Int dx, dy;
110    Int xpred, ypred;
111    Int xsum, ysum;
112    Int round1;
113
114    OSCL_UNUSED_ARG(offset);
115
116    round1 = (Int)(1 - video->currVop->roundingType);
117
118    pitch  = currVop->pitch;
119    height = currVop->height;
120    pitch_uv  = pitch >> 1;
121    height_uv = height >> 1;
122
123    ypos = ind_y << 4 ;
124    xpos = ind_x << 4 ;
125
126    c_rec = video->predictedMB;
127    cu_rec = video->predictedMB + 256;
128    cv_rec = video->predictedMB + 264;
129
130    if (mode == MODE_INTER || mode == MODE_INTER_Q)
131    {
132        /* Motion vector in x direction       */
133        dx = mot[0].x;
134        dy = mot[0].y;
135
136        c_prev  = prevVop->yChan;
137
138        xpred = (xpos << 1) + dx ;
139        ypred = (ypos << 1) + dy ;
140
141        /* Call function that performs luminance prediction */
142        EncPrediction_INTER(xpred, ypred, c_prev, c_rec,
143                            pitch, round1);
144
145        if ((dx & 3) == 0)  dx = dx >> 1;
146        else        dx = (dx >> 1) | 1;
147
148        if ((dy & 3) == 0)      dy = dy >> 1;
149        else        dy = (dy >> 1) | 1;
150
151        xpred = xpos + dx;
152        ypred = ypos + dy;
153
154        cu_prev = prevVop->uChan;
155        cv_prev = prevVop->vChan;
156
157        EncPrediction_Chrom(xpred, ypred, cu_prev, cv_prev, cu_rec, cv_rec,
158                            pitch_uv, (currVop->width) >> 1, height_uv, round1);
159    }
160#ifndef NO_INTER4V
161    else if (mode == MODE_INTER4V)
162    {
163        c_prev  = prevVop->yChan;
164        cu_prev = prevVop->uChan;
165        cv_prev = prevVop->vChan;
166
167        EncPrediction_INTER4V(xpos, ypos, mot, c_prev, c_rec,
168                              pitch, round1);
169
170        xsum = mot[1].x + mot[2].x + mot[3].x + mot[4].x;
171        ysum = mot[1].y + mot[2].y + mot[3].y + mot[4].y;
172
173        dx = PV_SIGN(xsum) * (roundtab16[(PV_ABS(xsum)) & 0xF] +
174                              (((PV_ABS(xsum)) >> 4) << 1));
175        dy = PV_SIGN(ysum) * (roundtab16[(PV_ABS(ysum)) & 0xF] +
176                              (((PV_ABS(ysum)) >> 4) << 1));
177
178        ypred = ypos + dy;
179        xpred = xpos + dx;
180
181        EncPrediction_Chrom(xpred, ypred, cu_prev, cv_prev, cu_rec, cv_rec,
182                            pitch_uv, (currVop->width) >> 1, height_uv, round1);
183    }
184#endif
185    else
186    {
187        ;//printf("Error, MODE_SKIPPED is not decided yet!\n");
188    }
189
190    return ;
191}
192
193/***************************************************************************
194    Function:   EncPrediction_INTER
195    Date:       04/17/2001
196    Purpose:    Get predicted area for luminance and compensate with the residue.
197                Modified from luminance_pred_mode_inter() in decoder.
198***************************************************************************/
199
200void EncPrediction_INTER(
201    Int xpred,          /* i */
202    Int ypred,          /* i */
203    UChar *c_prev,          /* i */
204    UChar *c_rec,       /* i */
205    Int lx,         /* i */
206    Int round1          /* i */
207)
208{
209    c_prev += (xpred >> 1) + ((ypred >> 1) * lx);
210
211    GetPredAdvBTable[ypred&1][xpred&1](c_prev, c_rec, lx, round1);
212
213    c_prev += B_SIZE;
214    c_rec += B_SIZE;
215
216    GetPredAdvBTable[ypred&1][xpred&1](c_prev, c_rec, lx, round1);
217
218    c_prev += (lx << 3) - B_SIZE;
219    c_rec += (16 << 3) - B_SIZE; /* padding */
220
221    GetPredAdvBTable[ypred&1][xpred&1](c_prev, c_rec, lx, round1);
222
223    c_prev += B_SIZE;
224    c_rec += B_SIZE;
225
226    GetPredAdvBTable[ypred&1][xpred&1](c_prev, c_rec, lx, round1);
227
228    return;
229}
230
231#ifndef NO_INTER4V
232/***************************************************************************
233    Function:   EncPrediction_INTER4V
234    Date:       04/17/2001
235    Purpose:    Get predicted area for luminance and compensate with the residue.
236                Modified from luminance_pred_mode_inter4v() in decoder.
237***************************************************************************/
238
239void EncPrediction_INTER4V(
240    Int xpos,           /* i */
241    Int ypos,           /* i */
242    MOT *mot,           /* i */
243    UChar *c_prev,          /* i */
244    UChar *c_rec,           /* i */
245    Int lx,         /* i */
246    Int round1          /* i */
247)
248{
249    Int ypred, xpred;
250
251    xpred = (Int)((xpos << 1) + mot[1].x);
252    ypred = (Int)((ypos << 1) + mot[1].y);
253
254    GetPredAdvBTable[ypred&1][xpred&1](c_prev + (xpred >> 1) + ((ypred >> 1)*lx),
255                                       c_rec, lx, round1);
256
257    c_rec += B_SIZE;
258
259    xpred = (Int)(((xpos + B_SIZE) << 1) + mot[2].x);
260    ypred = (Int)((ypos << 1) + mot[2].y);
261
262    GetPredAdvBTable[ypred&1][xpred&1](c_prev + (xpred >> 1) + ((ypred >> 1)*lx),
263                                       c_rec, lx, round1);
264
265    c_rec += (16 << 3) - B_SIZE; /* padding */
266
267    xpred = (Int)((xpos << 1) + mot[3].x);
268    ypred = (Int)(((ypos + B_SIZE) << 1) + mot[3].y);
269
270    GetPredAdvBTable[ypred&1][xpred&1](c_prev + (xpred >> 1) + ((ypred >> 1)*lx),
271                                       c_rec, lx, round1);
272
273    c_rec += B_SIZE;
274
275    xpred = (Int)(((xpos + B_SIZE) << 1) + mot[4].x);
276    ypred = (Int)(((ypos + B_SIZE) << 1) + mot[4].y);
277
278    GetPredAdvBTable[ypred&1][xpred&1](c_prev + (xpred >> 1) + ((ypred >> 1)*lx),
279                                       c_rec, lx, round1);
280
281    return;
282}
283#endif /* NO_INTER4V */
284
285/***************************************************************************
286    Function:   EncPrediction_Chrom
287    Date:       04/17/2001
288    Purpose:    Get predicted area for chrominance and compensate with the residue.
289                Modified from chrominance_pred() in decoder.
290***************************************************************************/
291
292void EncPrediction_Chrom(
293    Int xpred,          /* i */
294    Int ypred,          /* i */
295    UChar *cu_prev,         /* i */
296    UChar *cv_prev,         /* i */
297    UChar *cu_rec,
298    UChar *cv_rec,
299    Int lx,
300    Int width_uv,           /* i */
301    Int height_uv,          /* i */
302    Int round1          /* i */
303)
304{
305    /* check whether the MV points outside the frame */
306    /* Compute prediction for Chrominance b block (block[4]) */
307    if (xpred >= 0 && xpred <= ((width_uv << 1) - (2*B_SIZE)) && ypred >= 0 &&
308            ypred <= ((height_uv << 1) - (2*B_SIZE)))
309    {
310        /*****************************/
311        /* (x,y) is inside the frame */
312        /*****************************/
313
314        /* Compute prediction for Chrominance b (block[4]) */
315        GetPredAdvBTable[ypred&1][xpred&1](cu_prev + (xpred >> 1) + ((ypred >> 1)*lx),
316                                           cu_rec, lx, round1);
317
318        /* Compute prediction for Chrominance r (block[5]) */
319        GetPredAdvBTable[ypred&1][xpred&1](cv_prev + (xpred >> 1) + ((ypred >> 1)*lx),
320                                           cv_rec,  lx, round1);
321    }
322    else
323    {
324        /******************************/
325        /* (x,y) is outside the frame */
326        /******************************/
327
328        /* Compute prediction for Chrominance b (block[4]) */
329        EncGetPredOutside(xpred, ypred,
330                          cu_prev, cu_rec,
331                          width_uv, height_uv, round1);
332
333        /* Compute prediction for Chrominance r (block[5]) */
334        EncGetPredOutside(xpred, ypred,
335                          cv_prev, cv_rec,
336                          width_uv, height_uv, round1);
337    }
338
339    return;
340}
341/***************************************************************************
342    Function:   GetPredAdvancedB
343    Date:       04/17/2001
344    Purpose:    Get predicted area (block) and compensate with the residue.
345                - modified from GetPredAdvancedBAdd in decoder.
346    Intput/Output:
347    Modified:
348***************************************************************************/
349
350Int GetPredAdvBy0x0(
351    UChar *prev,        /* i */
352    UChar *rec,     /* i */
353    Int lx,     /* i */
354    Int rnd /* i */
355)
356{
357    Int i;      /* loop variable */
358    ULong  pred_word, word1, word2;
359    Int tmp;
360
361    OSCL_UNUSED_ARG(rnd);
362
363    /* initialize offset to adjust pixel counter */
364    /*    the next row; full-pel resolution      */
365
366    tmp = (ULong)prev & 0x3;
367
368    if (tmp == 0)  /* word-aligned */
369    {
370        rec -= 16; /* preset */
371        prev -= lx;
372
373        for (i = 8; i > 0; i--)
374        {
375            *((ULong*)(rec += 16)) = *((ULong*)(prev += lx));
376            *((ULong*)(rec + 4)) = *((ULong*)(prev + 4));
377        }
378        return 1;
379    }
380    else if (tmp == 1) /* first position */
381    {
382        prev--; /* word-aligned */
383        rec -= 16; /* preset */
384        prev -= lx;
385
386        for (i = 8; i > 0; i--)
387        {
388            word1 = *((ULong*)(prev += lx)); /* read 4 bytes, b4 b3 b2 b1 */
389            word2 = *((ULong*)(prev + 4));  /* read 4 bytes, b8 b7 b6 b5 */
390            word1 >>= 8; /* 0 b4 b3 b2 */
391            pred_word = word1 | (word2 << 24);  /* b5 b4 b3 b2 */
392            *((ULong*)(rec += 16)) = pred_word;
393
394            word1 = *((ULong*)(prev + 8)); /* b12 b11 b10 b9 */
395            word2 >>= 8; /* 0 b8 b7 b6 */
396            pred_word = word2 | (word1 << 24); /* b9 b8 b7 b6 */
397            *((ULong*)(rec + 4)) = pred_word;
398        }
399
400        return 1;
401    }
402    else if (tmp == 2) /* second position */
403    {
404        prev -= 2; /* word1-aligned */
405        rec -= 16; /* preset */
406        prev -= lx;
407
408        for (i = 8; i > 0; i--)
409        {
410            word1 = *((ULong*)(prev += lx)); /* read 4 bytes, b4 b3 b2 b1 */
411            word2 = *((ULong*)(prev + 4));  /* read 4 bytes, b8 b7 b6 b5 */
412            word1 >>= 16; /* 0 0 b4 b3 */
413            pred_word = word1 | (word2 << 16);  /* b6 b5 b4 b3 */
414            *((ULong*)(rec += 16)) = pred_word;
415
416            word1 = *((ULong*)(prev + 8)); /* b12 b11 b10 b9 */
417            word2 >>= 16; /* 0 0 b8 b7 */
418            pred_word = word2 | (word1 << 16); /* b10 b9 b8 b7 */
419            *((ULong*)(rec + 4)) = pred_word;
420        }
421
422        return 1;
423    }
424    else /* third position */
425    {
426        prev -= 3; /* word1-aligned */
427        rec -= 16; /* preset */
428        prev -= lx;
429
430        for (i = 8; i > 0; i--)
431        {
432            word1 = *((ULong*)(prev += lx)); /* read 4 bytes, b4 b3 b2 b1 */
433            word2 = *((ULong*)(prev + 4));  /* read 4 bytes, b8 b7 b6 b5 */
434            word1 >>= 24; /* 0 0 0 b4 */
435            pred_word = word1 | (word2 << 8);   /* b7 b6 b5 b4 */
436            *((ULong*)(rec += 16)) = pred_word;
437
438            word1 = *((ULong*)(prev + 8)); /* b12 b11 b10 b9 */
439            word2 >>= 24; /* 0 0 0 b8 */
440            pred_word = word2 | (word1 << 8); /* b11 b10 b9 b8 */
441            *((ULong*)(rec + 4)) = pred_word;
442
443        }
444
445        return 1;
446    }
447}
448/**************************************************************************/
449Int GetPredAdvBy0x1(
450    UChar *prev,        /* i */
451    UChar *rec,     /* i */
452    Int lx,     /* i */
453    Int rnd1 /* i */
454)
455{
456    Int i;      /* loop variable */
457    Int offset;
458    ULong word1, word2, word3, word12;
459    Int tmp;
460    ULong mask;
461
462    /* initialize offset to adjust pixel counter */
463    /*    the next row; full-pel resolution      */
464    offset = lx - B_SIZE; /* offset for prev */
465
466    /* Branch based on pixel location (half-pel or full-pel) for x and y */
467    rec -= 12; /* preset */
468
469    tmp = (ULong)prev & 3;
470    mask = 254;
471    mask |= (mask << 8);
472    mask |= (mask << 16); /* 0xFEFEFEFE */
473
474    if (tmp == 0) /* word-aligned */
475    {
476        if (rnd1 == 1)
477        {
478            for (i = B_SIZE; i > 0; i--)
479            {
480                word1 = *((ULong*)prev); /* b4 b3 b2 b1 */
481                word2 = *((ULong*)(prev += 4)); /* b8 b7 b6 b5 */
482                word12 = (word1 >> 8); /* 0 b4 b3 b2 */
483                word12 |= (word2 << 24); /* b5 b4 b3 b2 */
484                word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
485                word1 &= mask;
486                word3 &= (~mask); /* 0x1010101, check last bit */
487                word12 &= mask;
488                word1 >>= 1;
489                word1 = word1 + (word12 >> 1);
490                word1 += word3;
491                *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
492
493                word1 = *((ULong*)(prev += 4)); /* b12 b11 b10 b9 */
494                word12 = (word2 >> 8); /* 0 b8 b7 b6 */
495                word12 |= (word1 << 24); /* b9 b8 b7 b6 */
496                word3 = word2 | word12;
497                word2 &= mask;
498                word3 &= (~mask);  /* 0x1010101, check last bit */
499                word12 &= mask;
500                word2 >>= 1;
501                word2 = word2 + (word12 >> 1);
502                word2 += word3;
503                *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
504
505                prev += offset;
506            }
507            return 1;
508        }
509        else /* rnd1 == 0 */
510        {
511            for (i = B_SIZE; i > 0; i--)
512            {
513                word1 = *((ULong*)prev); /* b4 b3 b2 b1 */
514
515                word2 = *((ULong*)(prev += 4)); /* b8 b7 b6 b5 */
516                word12 = (word1 >> 8); /* 0 b4 b3 b2 */
517                word12 |= (word2 << 24); /* b5 b4 b3 b2 */
518                word3 = word1 & word12; // rnd1 = 1; otherwise word3 = word1&word12
519                word1 &= mask;
520                word3 &= (~mask); /* 0x1010101, check last bit */
521                word12 &= mask;
522                word1 >>= 1;
523                word1 = word1 + (word12 >> 1);
524                word1 += word3;
525                *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
526
527                word1 = *((ULong*)(prev += 4)); /* b12 b11 b10 b9 */
528                word12 = (word2 >> 8); /* 0 b8 b7 b6 */
529                word12 |= (word1 << 24); /* b9 b8 b7 b6 */
530                word3 = word2 & word12;
531                word2 &= mask;
532                word3 &= (~mask);  /* 0x1010101, check last bit */
533                word12 &= mask;
534                word2 >>= 1;
535                word2 = word2 + (word12 >> 1);
536                word2 += word3;
537                *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
538
539                prev += offset;
540            }
541            return 1;
542        } /* rnd1 */
543    }
544    else if (tmp == 1)
545    {
546        prev--; /* word-aligned */
547        if (rnd1 == 1)
548        {
549            for (i = B_SIZE; i > 0; i--)
550            {
551                word1 = *((ULong*)prev); /* b3 b2 b1 b0 */
552                word2 = *((ULong*)(prev += 4)); /* b7 b6 b5 b4 */
553                word12 = (word1 >> 8); /* 0 b3 b2 b1 */
554                word1 >>= 16; /* 0 0 b3 b2 */
555                word12 |= (word2 << 24); /* b4 b3 b2 b1 */
556                word1 |= (word2 << 16); /* b5 b4 b3 b2 */
557                word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
558                word1 &= mask;
559                word3 &= (~mask); /* 0x1010101, check last bit */
560                word12 &= mask;
561                word1 >>= 1;
562                word1 = word1 + (word12 >> 1);
563                word1 += word3;
564                *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
565
566                word1 = *((ULong*)(prev += 4)); /* b11 b10 b9 b8 */
567                word12 = (word2 >> 8); /* 0 b7 b6 b5 */
568                word2 >>= 16; /* 0 0 b7 b6 */
569                word12 |= (word1 << 24); /* b8 b7 b6 b5 */
570                word2 |= (word1 << 16); /* b9 b8 b7 b6 */
571                word3 = word2 | word12; // rnd1 = 1; otherwise word3 = word2&word12
572                word2 &= mask;
573                word3 &= (~mask); /* 0x1010101, check last bit */
574                word12 &= mask;
575                word2 >>= 1;
576                word2 = word2 + (word12 >> 1);
577                word2 += word3;
578                *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
579
580                prev += offset;
581            }
582            return 1;
583        }
584        else /* rnd1 = 0 */
585        {
586            for (i = B_SIZE; i > 0; i--)
587            {
588                word1 = *((ULong*)prev); /* b3 b2 b1 b0 */
589
590                word2 = *((ULong*)(prev += 4)); /* b7 b6 b5 b4 */
591                word12 = (word1 >> 8); /* 0 b3 b2 b1 */
592                word1 >>= 16; /* 0 0 b3 b2 */
593                word12 |= (word2 << 24); /* b4 b3 b2 b1 */
594                word1 |= (word2 << 16); /* b5 b4 b3 b2 */
595                word3 = word1 & word12;
596                word1 &= mask;
597                word3 &= (~mask); /* 0x1010101, check last bit */
598                word12 &= mask;
599                word1 >>= 1;
600                word1 = word1 + (word12 >> 1);
601                word1 += word3;
602                *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
603
604                word1 = *((ULong*)(prev += 4)); /* b11 b10 b9 b8 */
605                word12 = (word2 >> 8); /* 0 b7 b6 b5 */
606                word2 >>= 16; /* 0 0 b7 b6 */
607                word12 |= (word1 << 24); /* b8 b7 b6 b5 */
608                word2 |= (word1 << 16); /* b9 b8 b7 b6 */
609                word3 = word2 & word12;
610                word2 &= mask;
611                word3 &= (~mask); /* 0x1010101, check last bit */
612                word12 &= mask;
613                word2 >>= 1;
614                word2 = word2 + (word12 >> 1);
615                word2 += word3;
616                *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
617
618                prev += offset;
619            }
620            return 1;
621        } /* rnd1 */
622    }
623    else if (tmp == 2)
624    {
625        prev -= 2; /* word-aligned */
626        if (rnd1 == 1)
627        {
628            for (i = B_SIZE; i > 0; i--)
629            {
630                word1 = *((ULong*)prev); /* b2 b1 b0 bN1 */
631                word2 = *((ULong*)(prev += 4)); /* b6 b5 b4 b3 */
632                word12 = (word1 >> 16); /* 0 0 b2 b1 */
633                word1 >>= 24; /* 0 0 0 b2 */
634                word12 |= (word2 << 16); /* b4 b3 b2 b1 */
635                word1 |= (word2 << 8); /* b5 b4 b3 b2 */
636                word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
637                word1 &= mask;
638                word3 &= (~mask); /* 0x1010101, check last bit */
639                word12 &= mask;
640                word1 >>= 1;
641                word1 = word1 + (word12 >> 1);
642                word1 += word3;
643                *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
644
645                word1 = *((ULong*)(prev += 4)); /* b10 b9 b8 b7 */
646                word12 = (word2 >> 16); /* 0 0 b6 b5 */
647                word2 >>= 24; /* 0 0 0 b6 */
648                word12 |= (word1 << 16); /* b8 b7 b6 b5 */
649                word2 |= (word1 << 8); /* b9 b8 b7 b6 */
650                word3 = word2 | word12; // rnd1 = 1; otherwise word3 = word1&word12
651                word2 &= mask;
652                word3 &= (~mask); /* 0x1010101, check last bit */
653                word12 &= mask;
654                word2 >>= 1;
655                word2 = word2 + (word12 >> 1);
656                word2 += word3;
657                *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
658                prev += offset;
659            }
660            return 1;
661        }
662        else /* rnd1 == 0 */
663        {
664            for (i = B_SIZE; i > 0; i--)
665            {
666                word1 = *((ULong*)prev); /* b2 b1 b0 bN1 */
667                word2 = *((ULong*)(prev += 4)); /* b6 b5 b4 b3 */
668                word12 = (word1 >> 16); /* 0 0 b2 b1 */
669                word1 >>= 24; /* 0 0 0 b2 */
670                word12 |= (word2 << 16); /* b4 b3 b2 b1 */
671                word1 |= (word2 << 8); /* b5 b4 b3 b2 */
672                word3 = word1 & word12; // rnd1 = 1; otherwise word3 = word1&word12
673                word1 &= mask;
674                word3 &= (~mask); /* 0x1010101, check last bit */
675                word12 &= mask;
676                word1 >>= 1;
677                word1 = word1 + (word12 >> 1);
678                word1 += word3;
679                *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
680
681                word1 = *((ULong*)(prev += 4)); /* b10 b9 b8 b7 */
682                word12 = (word2 >> 16); /* 0 0 b6 b5 */
683                word2 >>= 24; /* 0 0 0 b6 */
684                word12 |= (word1 << 16); /* b8 b7 b6 b5 */
685                word2 |= (word1 << 8); /* b9 b8 b7 b6 */
686                word3 = word2 & word12; // rnd1 = 1; otherwise word3 = word1&word12
687                word2 &= mask;
688                word3 &= (~mask); /* 0x1010101, check last bit */
689                word12 &= mask;
690                word2 >>= 1;
691                word2 = word2 + (word12 >> 1);
692                word2 += word3;
693                *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
694                prev += offset;
695            }
696            return 1;
697        }
698    }
699    else /* tmp = 3 */
700    {
701        prev -= 3; /* word-aligned */
702        if (rnd1 == 1)
703        {
704            for (i = B_SIZE; i > 0; i--)
705            {
706                word1 = *((ULong*)prev); /* b1 b0 bN1 bN2 */
707                word2 = *((ULong*)(prev += 4)); /* b5 b4 b3 b2 */
708                word12 = (word1 >> 24); /* 0 0 0 b1 */
709                word12 |= (word2 << 8); /* b4 b3 b2 b1 */
710                word1 = word2;
711                word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
712                word1 &= mask;
713                word3 &= (~mask); /* 0x1010101, check last bit */
714                word12 &= mask;
715                word1 >>= 1;
716                word1 = word1 + (word12 >> 1);
717                word1 += word3;
718                *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
719
720                word1 = *((ULong*)(prev += 4)); /* b9 b8 b7 b6 */
721                word12 = (word2 >> 24); /* 0 0 0 b5 */
722                word12 |= (word1 << 8); /* b8 b7 b6 b5 */
723                word2 = word1; /* b9 b8 b7 b6 */
724                word3 = word2 | word12; // rnd1 = 1; otherwise word3 = word1&word12
725                word2 &= mask;
726                word3 &= (~mask); /* 0x1010101, check last bit */
727                word12 &= mask;
728                word2 >>= 1;
729                word2 = word2 + (word12 >> 1);
730                word2 += word3;
731                *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
732                prev += offset;
733            }
734            return 1;
735        }
736        else
737        {
738            for (i = B_SIZE; i > 0; i--)
739            {
740                word1 = *((ULong*)prev); /* b1 b0 bN1 bN2 */
741                word2 = *((ULong*)(prev += 4)); /* b5 b4 b3 b2 */
742                word12 = (word1 >> 24); /* 0 0 0 b1 */
743                word12 |= (word2 << 8); /* b4 b3 b2 b1 */
744                word1 = word2;
745                word3 = word1 & word12; // rnd1 = 1; otherwise word3 = word1&word12
746                word1 &= mask;
747                word3 &= (~mask); /* 0x1010101, check last bit */
748                word12 &= mask;
749                word1 >>= 1;
750                word1 = word1 + (word12 >> 1);
751                word1 += word3;
752                *((ULong*)(rec += 12)) = word1; /* write 4 pixels */
753
754                word1 = *((ULong*)(prev += 4)); /* b9 b8 b7 b6 */
755                word12 = (word2 >> 24); /* 0 0 0 b5 */
756                word12 |= (word1 << 8); /* b8 b7 b6 b5 */
757                word2 = word1; /* b9 b8 b7 b6 */
758                word3 = word2 & word12; // rnd1 = 1; otherwise word3 = word1&word12
759                word2 &= mask;
760                word3 &= (~mask); /* 0x1010101, check last bit */
761                word12 &= mask;
762                word2 >>= 1;
763                word2 = word2 + (word12 >> 1);
764                word2 += word3;
765                *((ULong*)(rec += 4)) = word2; /* write 4 pixels */
766                prev += offset;
767            }
768            return 1;
769        }
770    }
771}
772
773/**************************************************************************/
774Int GetPredAdvBy1x0(
775    UChar *prev,        /* i */
776    UChar *rec,     /* i */
777    Int lx,     /* i */
778    Int rnd1 /* i */
779)
780{
781    Int i;      /* loop variable */
782    Int offset;
783    ULong  word1, word2, word3, word12, word22;
784    Int tmp;
785    ULong mask;
786
787    /* initialize offset to adjust pixel counter */
788    /*    the next row; full-pel resolution      */
789    offset = lx - B_SIZE; /* offset for prev */
790
791    /* Branch based on pixel location (half-pel or full-pel) for x and y */
792    rec -= 12; /* preset */
793
794    tmp = (ULong)prev & 3;
795    mask = 254;
796    mask |= (mask << 8);
797    mask |= (mask << 16); /* 0xFEFEFEFE */
798
799    if (tmp == 0) /* word-aligned */
800    {
801        prev -= 4;
802        if (rnd1 == 1)
803        {
804            for (i = B_SIZE; i > 0; i--)
805            {
806                word1 = *((ULong*)(prev += 4));
807                word2 = *((ULong*)(prev + lx));
808                word3 = word1 | word2; // rnd1 = 1; otherwise word3 = word1&word2
809                word1 &= mask;
810                word3 &= (~mask); /* 0x1010101, check last bit */
811                word2 &= mask;
812                word1 >>= 1;
813                word1 = word1 + (word2 >> 1);
814                word1 += word3;
815                *((ULong*)(rec += 12)) = word1;
816                word1 = *((ULong*)(prev += 4));
817                word2 = *((ULong*)(prev + lx));
818                word3 = word1 | word2; // rnd1 = 1; otherwise word3 = word1&word2
819                word1 &= mask;
820                word3 &= (~mask); /* 0x1010101, check last bit */
821                word2 &= mask;
822                word1 >>= 1;
823                word1 = word1 + (word2 >> 1);
824                word1 += word3;
825                *((ULong*)(rec += 4)) = word1;
826
827                prev += offset;
828            }
829            return 1;
830        }
831        else   /* rnd1 = 0 */
832        {
833            for (i = B_SIZE; i > 0; i--)
834            {
835                word1 = *((ULong*)(prev += 4));
836                word2 = *((ULong*)(prev + lx));
837                word3 = word1 & word2;  /* rnd1 = 0; */
838                word1 &= mask;
839                word3 &= (~mask); /* 0x1010101, check last bit */
840                word2 &= mask;
841                word1 >>= 1;
842                word1 = word1 + (word2 >> 1);
843                word1 += word3;
844                *((ULong*)(rec += 12)) = word1;
845                word1 = *((ULong*)(prev += 4));
846                word2 = *((ULong*)(prev + lx));
847                word3 = word1 & word2;  /* rnd1 = 0; */
848                word1 &= mask;
849                word3 &= (~mask); /* 0x1010101, check last bit */
850                word2 &= mask;
851                word1 >>= 1;
852                word1 = word1 + (word2 >> 1);
853                word1 += word3;
854                *((ULong*)(rec += 4)) = word1;
855
856                prev += offset;
857            }
858            return 1;
859        }
860    }
861    else if (tmp == 1)
862    {
863        prev--; /* word-aligned */
864        if (rnd1 == 1)
865        {
866            for (i = B_SIZE; i > 0; i--)
867            {
868                word12 = *((ULong*)prev); /* read b4 b3 b2 b1 */
869                word22 = *((ULong*)(prev + lx));
870
871                word1 = *((ULong*)(prev += 4)); /* read b8 b7 b6 b5 */
872                word2 = *((ULong*)(prev + lx));
873                word12 >>= 8; /* 0 b4 b3 b2 */
874                word22 >>= 8;
875                word12 = word12 | (word1 << 24); /* b5 b4 b3 b2 */
876                word22 = word22 | (word2 << 24);
877                word3 = word12 | word22;
878                word12 &= mask;
879                word22 &= mask;
880                word3 &= (~mask); /* 0x1010101, check last bit */
881                word12 >>= 1;
882                word12 = word12 + (word22 >> 1);
883                word12 += word3;
884                *((ULong*)(rec += 12)) = word12;
885
886                word12 = *((ULong*)(prev += 4)); /* read b12 b11 b10 b9 */
887                word22 = *((ULong*)(prev + lx));
888                word1 >>= 8; /* 0 b8 b7 b6 */
889                word2 >>= 8;
890                word1 = word1 | (word12 << 24); /* b9 b8 b7 b6 */
891                word2 = word2 | (word22 << 24);
892                word3 = word1 | word2;
893                word1 &= mask;
894                word2 &= mask;
895                word3 &= (~mask); /* 0x1010101, check last bit */
896                word1 >>= 1;
897                word1 = word1 + (word2 >> 1);
898                word1 += word3;
899                *((ULong*)(rec += 4)) = word1;
900                prev += offset;
901            }
902            return 1;
903        }
904        else /* rnd1 = 0 */
905        {
906            for (i = B_SIZE; i > 0; i--)
907            {
908                word12 = *((ULong*)prev); /* read b4 b3 b2 b1 */
909                word22 = *((ULong*)(prev + lx));
910
911                word1 = *((ULong*)(prev += 4)); /* read b8 b7 b6 b5 */
912                word2 = *((ULong*)(prev + lx));
913                word12 >>= 8; /* 0 b4 b3 b2 */
914                word22 >>= 8;
915                word12 = word12 | (word1 << 24); /* b5 b4 b3 b2 */
916                word22 = word22 | (word2 << 24);
917                word3 = word12 & word22;
918                word12 &= mask;
919                word22 &= mask;
920                word3 &= (~mask); /* 0x1010101, check last bit */
921                word12 >>= 1;
922                word12 = word12 + (word22 >> 1);
923                word12 += word3;
924                *((ULong*)(rec += 12)) = word12;
925
926                word12 = *((ULong*)(prev += 4)); /* read b12 b11 b10 b9 */
927                word22 = *((ULong*)(prev + lx));
928                word1 >>= 8; /* 0 b8 b7 b6 */
929                word2 >>= 8;
930                word1 = word1 | (word12 << 24); /* b9 b8 b7 b6 */
931                word2 = word2 | (word22 << 24);
932                word3 = word1 & word2;
933                word1 &= mask;
934                word2 &= mask;
935                word3 &= (~mask); /* 0x1010101, check last bit */
936                word1 >>= 1;
937                word1 = word1 + (word2 >> 1);
938                word1 += word3;
939                *((ULong*)(rec += 4)) = word1;
940                prev += offset;
941            }
942            return 1;
943        }
944    }
945    else if (tmp == 2)
946    {
947        prev -= 2; /* word-aligned */
948        if (rnd1 == 1)
949        {
950            for (i = B_SIZE; i > 0; i--)
951            {
952                word12 = *((ULong*)prev); /* read b4 b3 b2 b1 */
953                word22 = *((ULong*)(prev + lx));
954
955                word1 = *((ULong*)(prev += 4)); /* read b8 b7 b6 b5 */
956                word2 = *((ULong*)(prev + lx));
957                word12 >>= 16; /* 0 0 b4 b3 */
958                word22 >>= 16;
959                word12 = word12 | (word1 << 16); /* b6 b5 b4 b3 */
960                word22 = word22 | (word2 << 16);
961                word3 = word12 | word22;
962                word12 &= mask;
963                word22 &= mask;
964                word3 &= (~mask); /* 0x1010101, check last bit */
965                word12 >>= 1;
966                word12 = word12 + (word22 >> 1);
967                word12 += word3;
968                *((ULong*)(rec += 12)) = word12;
969
970                word12 = *((ULong*)(prev += 4)); /* read b12 b11 b10 b9 */
971                word22 = *((ULong*)(prev + lx));
972                word1 >>= 16; /* 0 0 b8 b7 */
973                word2 >>= 16;
974                word1 = word1 | (word12 << 16); /* b10 b9 b8 b7 */
975                word2 = word2 | (word22 << 16);
976                word3 = word1 | word2;
977                word1 &= mask;
978                word2 &= mask;
979                word3 &= (~mask); /* 0x1010101, check last bit */
980                word1 >>= 1;
981                word1 = word1 + (word2 >> 1);
982                word1 += word3;
983                *((ULong*)(rec += 4)) = word1;
984                prev += offset;
985            }
986            return 1;
987        }
988        else /* rnd1 = 0 */
989        {
990            for (i = B_SIZE; i > 0; i--)
991            {
992                word12 = *((ULong*)prev); /* read b4 b3 b2 b1 */
993                word22 = *((ULong*)(prev + lx));
994
995                word1 = *((ULong*)(prev += 4)); /* read b8 b7 b6 b5 */
996                word2 = *((ULong*)(prev + lx));
997                word12 >>= 16; /* 0 0 b4 b3 */
998                word22 >>= 16;
999                word12 = word12 | (word1 << 16); /* b6 b5 b4 b3 */
1000                word22 = word22 | (word2 << 16);
1001                word3 = word12 & word22;
1002                word12 &= mask;
1003                word22 &= mask;
1004                word3 &= (~mask); /* 0x1010101, check last bit */
1005                word12 >>= 1;
1006                word12 = word12 + (word22 >> 1);
1007                word12 += word3;
1008                *((ULong*)(rec += 12)) = word12;
1009
1010                word12 = *((ULong*)(prev += 4)); /* read b12 b11 b10 b9 */
1011                word22 = *((ULong*)(prev + lx));
1012                word1 >>= 16; /* 0 0 b8 b7 */
1013                word2 >>= 16;
1014                word1 = word1 | (word12 << 16); /* b10 b9 b8 b7 */
1015                word2 = word2 | (word22 << 16);
1016                word3 = word1 & word2;
1017                word1 &= mask;
1018                word2 &= mask;
1019                word3 &= (~mask); /* 0x1010101, check last bit */
1020                word1 >>= 1;
1021                word1 = word1 + (word2 >> 1);
1022                word1 += word3;
1023                *((ULong*)(rec += 4)) = word1;
1024                prev += offset;
1025            }
1026
1027            return 1;
1028        }
1029    }
1030    else /* tmp == 3 */
1031    {
1032        prev -= 3; /* word-aligned */
1033        if (rnd1 == 1)
1034        {
1035            for (i = B_SIZE; i > 0; i--)
1036            {
1037                word12 = *((ULong*)prev); /* read b4 b3 b2 b1 */
1038                word22 = *((ULong*)(prev + lx));
1039
1040                word1 = *((ULong*)(prev += 4)); /* read b8 b7 b6 b5 */
1041                word2 = *((ULong*)(prev + lx));
1042                word12 >>= 24; /* 0 0 0 b4 */
1043                word22 >>= 24;
1044                word12 = word12 | (word1 << 8); /* b7 b6 b5 b4 */
1045                word22 = word22 | (word2 << 8);
1046                word3 = word12 | word22;
1047                word12 &= mask;
1048                word22 &= mask;
1049                word3 &= (~mask); /* 0x1010101, check last bit */
1050                word12 >>= 1;
1051                word12 = word12 + (word22 >> 1);
1052                word12 += word3;
1053                *((ULong*)(rec += 12)) = word12;
1054
1055                word12 = *((ULong*)(prev += 4)); /* read b12 b11 b10 b9 */
1056                word22 = *((ULong*)(prev + lx));
1057                word1 >>= 24; /* 0 0 0 b8 */
1058                word2 >>= 24;
1059                word1 = word1 | (word12 << 8); /* b11 b10 b9 b8 */
1060                word2 = word2 | (word22 << 8);
1061                word3 = word1 | word2;
1062                word1 &= mask;
1063                word2 &= mask;
1064                word3 &= (~mask); /* 0x1010101, check last bit */
1065                word1 >>= 1;
1066                word1 = word1 + (word2 >> 1);
1067                word1 += word3;
1068                *((ULong*)(rec += 4)) = word1;
1069                prev += offset;
1070            }
1071            return 1;
1072        }
1073        else /* rnd1 = 0 */
1074        {
1075            for (i = B_SIZE; i > 0; i--)
1076            {
1077                word12 = *((ULong*)prev); /* read b4 b3 b2 b1 */
1078                word22 = *((ULong*)(prev + lx));
1079
1080                word1 = *((ULong*)(prev += 4)); /* read b8 b7 b6 b5 */
1081                word2 = *((ULong*)(prev + lx));
1082                word12 >>= 24; /* 0 0 0 b4 */
1083                word22 >>= 24;
1084                word12 = word12 | (word1 << 8); /* b7 b6 b5 b4 */
1085                word22 = word22 | (word2 << 8);
1086                word3 = word12 & word22;
1087                word12 &= mask;
1088                word22 &= mask;
1089                word3 &= (~mask); /* 0x1010101, check last bit */
1090                word12 >>= 1;
1091                word12 = word12 + (word22 >> 1);
1092                word12 += word3;
1093                *((ULong*)(rec += 12)) = word12;
1094
1095                word12 = *((ULong*)(prev += 4)); /* read b12 b11 b10 b9 */
1096                word22 = *((ULong*)(prev + lx));
1097                word1 >>= 24; /* 0 0 0 b8 */
1098                word2 >>= 24;
1099                word1 = word1 | (word12 << 8); /* b11 b10 b9 b8 */
1100                word2 = word2 | (word22 << 8);
1101                word3 = word1 & word2;
1102                word1 &= mask;
1103                word2 &= mask;
1104                word3 &= (~mask); /* 0x1010101, check last bit */
1105                word1 >>= 1;
1106                word1 = word1 + (word2 >> 1);
1107                word1 += word3;
1108                *((ULong*)(rec += 4)) = word1;
1109                prev += offset;
1110            }
1111            return 1;
1112        } /* rnd */
1113    } /* tmp */
1114}
1115
1116/**********************************************************************************/
1117Int GetPredAdvBy1x1(
1118    UChar *prev,        /* i */
1119    UChar *rec,     /* i */
1120    Int lx,     /* i */
1121    Int rnd1 /* i */
1122)
1123{
1124    Int i;      /* loop variable */
1125    Int offset;
1126    ULong  x1, x2, x1m, x2m, y1, y2, y1m, y2m; /* new way */
1127    Int tmp;
1128    Int rnd2;
1129    ULong mask;
1130
1131    /* initialize offset to adjust pixel counter */
1132    /*    the next row; full-pel resolution      */
1133    offset = lx - B_SIZE; /* offset for prev */
1134
1135    rnd2 = rnd1 + 1;
1136    rnd2 |= (rnd2 << 8);
1137    rnd2 |= (rnd2 << 16);
1138
1139    mask = 0x3F;
1140    mask |= (mask << 8);
1141    mask |= (mask << 16); /* 0x3f3f3f3f */
1142
1143    tmp = (ULong)prev & 3;
1144
1145    rec -= 4; /* preset */
1146
1147    if (tmp == 0) /* word-aligned */
1148    {
1149        for (i = B_SIZE; i > 0; i--)
1150        {
1151            x1 = *((ULong*)prev); /* load a3 a2 a1 a0 */
1152            x2 = *((ULong*)(prev + lx)); /* load b3 b2 b1 b0, another line */
1153            y1 = *((ULong*)(prev += 4)); /* a7 a6 a5 a4 */
1154            y2 = *((ULong*)(prev + lx)); /* b7 b6 b5 b4 */
1155
1156            x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
1157            x2m = (x2 >> 2) & mask;
1158            x1 = x1 ^(x1m << 2);
1159            x2 = x2 ^(x2m << 2);
1160            x1m += x2m;
1161            x1 += x2;
1162
1163            /* x2m, x2 free */
1164            y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
1165            y2m = (y2 >> 2) & mask;
1166            y1 = y1 ^(y1m << 2);
1167            y2 = y2 ^(y2m << 2);
1168            y1m += y2m;
1169            y1 += y2;
1170
1171            /* y2m, y2 free */
1172            /* x2m, x2 free */
1173            x2 = *((ULong*)(prev += 4)); /* a11 a10 a9 a8 */
1174            y2 = *((ULong*)(prev + lx)); /* b11 b10 b9 b8 */
1175            x2m = (x2 >> 2) & mask;
1176            y2m = (y2 >> 2) & mask;
1177            x2 = x2 ^(x2m << 2);
1178            y2 = y2 ^(y2m << 2);
1179            x2m += y2m;
1180            x2 += y2;
1181            /* y2m, y2 free */
1182
1183            /* now operate on x1m, x1, y1m, y1, x2m, x2 */
1184            /* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
1185            /* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
1186            /* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
1187            /* x1, y1, x2 */
1188
1189            y2m = x1m >> 8;
1190            y2 = x1 >> 8;
1191            y2m |= (y1m << 24);  /* a4+b4, a3+b3, a2+b2, a1+b1 */
1192            y2 |= (y1 << 24);
1193            x1m += y2m;  /* a3+b3+a4+b4, ....., a0+b0+a1+b1 */
1194            x1 += y2;
1195            x1 += rnd2;
1196            x1 &= (mask << 2);
1197            x1m += (x1 >> 2);
1198            *((ULong*)(rec += 4)) = x1m; /* save x1m */
1199
1200            y2m = y1m >> 8;
1201            y2 = y1 >> 8;
1202            y2m |= (x2m << 24); /* a8+b8, a7+b7, a6+b6, a5+b5 */
1203            y2 |= (x2 << 24);
1204            y1m += y2m;  /* a7+b7+a8+b8, ....., a4+b4+a5+b5 */
1205            y1 += y2;
1206            y1 += rnd2;
1207            y1 &= (mask << 2);
1208            y1m += (y1 >> 2);
1209            *((ULong*)(rec += 4)) = y1m; /* save y1m */
1210
1211            rec += 8;
1212            prev += offset;
1213        }
1214
1215        return 1;
1216    }
1217    else if (tmp == 1)
1218    {
1219        prev--; /* to word-aligned */
1220        for (i = B_SIZE; i > 0; i--)
1221        {
1222            x1 = *((ULong*)prev); /* load a3 a2 a1 a0 */
1223            x2 = *((ULong*)(prev + lx)); /* load b3 b2 b1 b0, another line */
1224            y1 = *((ULong*)(prev += 4)); /* a7 a6 a5 a4 */
1225            y2 = *((ULong*)(prev + lx)); /* b7 b6 b5 b4 */
1226
1227            x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
1228            x2m = (x2 >> 2) & mask;
1229            x1 = x1 ^(x1m << 2);
1230            x2 = x2 ^(x2m << 2);
1231            x1m += x2m;
1232            x1 += x2;
1233
1234            /* x2m, x2 free */
1235            y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
1236            y2m = (y2 >> 2) & mask;
1237            y1 = y1 ^(y1m << 2);
1238            y2 = y2 ^(y2m << 2);
1239            y1m += y2m;
1240            y1 += y2;
1241
1242            /* y2m, y2 free */
1243            /* x2m, x2 free */
1244            x2 = *((ULong*)(prev += 4)); /* a11 a10 a9 a8 */
1245            y2 = *((ULong*)(prev + lx)); /* b11 b10 b9 b8 */
1246            x2m = (x2 >> 2) & mask;
1247            y2m = (y2 >> 2) & mask;
1248            x2 = x2 ^(x2m << 2);
1249            y2 = y2 ^(y2m << 2);
1250            x2m += y2m;
1251            x2 += y2;
1252            /* y2m, y2 free */
1253
1254            /* now operate on x1m, x1, y1m, y1, x2m, x2 */
1255            /* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
1256            /* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
1257            /* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
1258            /* x1, y1, x2 */
1259
1260            x1m >>= 8 ;
1261            x1 >>= 8;
1262            x1m |= (y1m << 24);  /* a4+b4, a3+b3, a2+b2, a1+b1 */
1263            x1 |= (y1 << 24);
1264            y2m = (y1m << 16);
1265            y2 = (y1 << 16);
1266            y2m |= (x1m >> 8); /* a5+b5, a4+b4, a3+b3, a2+b2 */
1267            y2 |= (x1 >> 8);
1268            x1 += rnd2;
1269            x1m += y2m;  /* a4+b4+a5+b5, ....., a1+b1+a2+b2 */
1270            x1 += y2;
1271            x1 &= (mask << 2);
1272            x1m += (x1 >> 2);
1273            *((ULong*)(rec += 4)) = x1m; /* save x1m */
1274
1275            y1m >>= 8;
1276            y1 >>= 8;
1277            y1m |= (x2m << 24); /* a8+b8, a7+b7, a6+b6, a5+b5 */
1278            y1 |= (x2 << 24);
1279            y2m = (x2m << 16);
1280            y2 = (x2 << 16);
1281            y2m |= (y1m >> 8); /*  a9+b9, a8+b8, a7+b7, a6+b6,*/
1282            y2 |= (y1 >> 8);
1283            y1 += rnd2;
1284            y1m += y2m;  /* a8+b8+a9+b9, ....., a5+b5+a6+b6 */
1285            y1 += y2;
1286            y1 &= (mask << 2);
1287            y1m += (y1 >> 2);
1288            *((ULong*)(rec += 4)) = y1m; /* save y1m */
1289
1290            rec += 8;
1291            prev += offset;
1292        }
1293        return 1;
1294    }
1295    else if (tmp == 2)
1296    {
1297        prev -= 2; /* to word-aligned */
1298        for (i = B_SIZE; i > 0; i--)
1299        {
1300            x1 = *((ULong*)prev); /* load a3 a2 a1 a0 */
1301            x2 = *((ULong*)(prev + lx)); /* load b3 b2 b1 b0, another line */
1302            y1 = *((ULong*)(prev += 4)); /* a7 a6 a5 a4 */
1303            y2 = *((ULong*)(prev + lx)); /* b7 b6 b5 b4 */
1304
1305            x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
1306            x2m = (x2 >> 2) & mask;
1307            x1 = x1 ^(x1m << 2);
1308            x2 = x2 ^(x2m << 2);
1309            x1m += x2m;
1310            x1 += x2;
1311
1312            /* x2m, x2 free */
1313            y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
1314            y2m = (y2 >> 2) & mask;
1315            y1 = y1 ^(y1m << 2);
1316            y2 = y2 ^(y2m << 2);
1317            y1m += y2m;
1318            y1 += y2;
1319
1320            /* y2m, y2 free */
1321            /* x2m, x2 free */
1322            x2 = *((ULong*)(prev += 4)); /* a11 a10 a9 a8 */
1323            y2 = *((ULong*)(prev + lx)); /* b11 b10 b9 b8 */
1324            x2m = (x2 >> 2) & mask;
1325            y2m = (y2 >> 2) & mask;
1326            x2 = x2 ^(x2m << 2);
1327            y2 = y2 ^(y2m << 2);
1328            x2m += y2m;
1329            x2 += y2;
1330            /* y2m, y2 free */
1331
1332            /* now operate on x1m, x1, y1m, y1, x2m, x2 */
1333            /* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
1334            /* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
1335            /* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
1336            /* x1, y1, x2 */
1337
1338            x1m >>= 16 ;
1339            x1 >>= 16;
1340            x1m |= (y1m << 16);  /* a5+b5, a4+b4, a3+b3, a2+b2 */
1341            x1 |= (y1 << 16);
1342            y2m = (y1m << 8);
1343            y2 = (y1 << 8);
1344            y2m |= (x1m >> 8); /* a6+b6, a5+b5, a4+b4, a3+b3 */
1345            y2 |= (x1 >> 8);
1346            x1 += rnd2;
1347            x1m += y2m;  /* a5+b5+a6+b6, ....., a2+b2+a3+b3 */
1348            x1 += y2;
1349            x1 &= (mask << 2);
1350            x1m += (x1 >> 2);
1351            *((ULong*)(rec += 4)) = x1m; /* save x1m */
1352
1353            y1m >>= 16;
1354            y1 >>= 16;
1355            y1m |= (x2m << 16); /* a9+b9, a8+b8, a7+b7, a6+b6 */
1356            y1 |= (x2 << 16);
1357            y2m = (x2m << 8);
1358            y2 = (x2 << 8);
1359            y2m |= (y1m >> 8); /*  a10+b10, a9+b9, a8+b8, a7+b7,*/
1360            y2 |= (y1 >> 8);
1361            y1 += rnd2;
1362            y1m += y2m;  /* a9+b9+a10+b10, ....., a6+b6+a7+b7 */
1363            y1 += y2;
1364            y1 &= (mask << 2);
1365            y1m += (y1 >> 2);
1366            *((ULong*)(rec += 4)) = y1m; /* save y1m */
1367
1368            rec += 8;
1369            prev += offset;
1370        }
1371        return 1;
1372    }
1373    else /* tmp == 3 */
1374    {
1375        prev -= 3; /* to word-aligned */
1376        for (i = B_SIZE; i > 0; i--)
1377        {
1378            x1 = *((ULong*)prev); /* load a3 a2 a1 a0 */
1379            x2 = *((ULong*)(prev + lx)); /* load b3 b2 b1 b0, another line */
1380            y1 = *((ULong*)(prev += 4)); /* a7 a6 a5 a4 */
1381            y2 = *((ULong*)(prev + lx)); /* b7 b6 b5 b4 */
1382
1383            x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
1384            x2m = (x2 >> 2) & mask;
1385            x1 = x1 ^(x1m << 2);
1386            x2 = x2 ^(x2m << 2);
1387            x1m += x2m;
1388            x1 += x2;
1389
1390            /* x2m, x2 free */
1391            y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
1392            y2m = (y2 >> 2) & mask;
1393            y1 = y1 ^(y1m << 2);
1394            y2 = y2 ^(y2m << 2);
1395            y1m += y2m;
1396            y1 += y2;
1397
1398            /* y2m, y2 free */
1399            /* x2m, x2 free */
1400            x2 = *((ULong*)(prev += 4)); /* a11 a10 a9 a8 */
1401            y2 = *((ULong*)(prev + lx)); /* b11 b10 b9 b8 */
1402            x2m = (x2 >> 2) & mask;
1403            y2m = (y2 >> 2) & mask;
1404            x2 = x2 ^(x2m << 2);
1405            y2 = y2 ^(y2m << 2);
1406            x2m += y2m;
1407            x2 += y2;
1408            /* y2m, y2 free */
1409
1410            /* now operate on x1m, x1, y1m, y1, x2m, x2 */
1411            /* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
1412            /* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
1413            /* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
1414            /* x1, y1, x2 */
1415
1416            x1m >>= 24 ;
1417            x1 >>= 24;
1418            x1m |= (y1m << 8);  /* a6+b6, a5+b5, a4+b4, a3+b3 */
1419            x1 |= (y1 << 8);
1420
1421            x1m += y1m;  /* a6+b6+a7+b7, ....., a3+b3+a4+b4 */
1422            x1 += y1;
1423            x1 += rnd2;
1424            x1 &= (mask << 2);
1425            x1m += (x1 >> 2);
1426            *((ULong*)(rec += 4)) = x1m; /* save x1m */
1427
1428            y1m >>= 24;
1429            y1 >>= 24;
1430            y1m |= (x2m << 8); /* a10+b10, a9+b9, a8+b8, a7+b7 */
1431            y1 |= (x2 << 8);
1432            y1m += x2m;  /* a10+b10+a11+b11, ....., a7+b7+a8+b8 */
1433            y1 += x2;
1434            y1 += rnd2;
1435            y1 &= (mask << 2);
1436            y1m += (y1 >> 2);
1437            *((ULong*)(rec += 4)) = y1m; /* save y1m */
1438
1439            rec += 8;
1440            prev += offset;
1441        }
1442        return 1;
1443    }
1444}
1445
1446
1447/*=============================================================================
1448    Function:   EncGetPredOutside
1449    Date:       04/17/2001
1450    Purpose:    - modified from GetPredOutside in the decoder.
1451    Modified:    09/24/05
1452                use the existing non-initialized padded region
1453=============================================================================*/
1454// not really needed since padding is included
1455#define PAD_CORNER  { temp = *src; \
1456                     temp |= (temp<<8); \
1457                     temp |= (temp<<16); \
1458                     *((ULong*)dst) = temp; \
1459                     *((ULong*)(dst+4)) = temp; \
1460                     *((ULong*)(dst+=lx)) = temp; \
1461                     *((ULong*)(dst+4)) = temp; \
1462                     *((ULong*)(dst+=lx)) = temp; \
1463                     *((ULong*)(dst+4)) = temp; \
1464                     *((ULong*)(dst+=lx)) = temp; \
1465                     *((ULong*)(dst+4)) = temp; \
1466                     *((ULong*)(dst+=lx)) = temp; \
1467                     *((ULong*)(dst+4)) = temp; \
1468                     *((ULong*)(dst+=lx)) = temp; \
1469                     *((ULong*)(dst+4)) = temp; \
1470                     *((ULong*)(dst+=lx)) = temp; \
1471                     *((ULong*)(dst+4)) = temp; \
1472                     *((ULong*)(dst+=lx)) = temp; \
1473                     *((ULong*)(dst+4)) = temp; }
1474
1475#define PAD_ROW     { temp = *((ULong*)src); \
1476                      temp2 = *((ULong*)(src+4)); \
1477                      *((ULong*)dst) = temp; \
1478                      *((ULong*)(dst+4)) = temp2; \
1479                      *((ULong*)(dst+=lx)) = temp; \
1480                      *((ULong*)(dst+4)) = temp2; \
1481                      *((ULong*)(dst+=lx)) = temp; \
1482                      *((ULong*)(dst+4)) = temp2; \
1483                      *((ULong*)(dst+=lx)) = temp; \
1484                      *((ULong*)(dst+4)) = temp2; \
1485                      *((ULong*)(dst+=lx)) = temp; \
1486                      *((ULong*)(dst+4)) = temp2; \
1487                      *((ULong*)(dst+=lx)) = temp; \
1488                      *((ULong*)(dst+4)) = temp2; \
1489                      *((ULong*)(dst+=lx)) = temp; \
1490                      *((ULong*)(dst+4)) = temp2; \
1491                      *((ULong*)(dst+=lx)) = temp; \
1492                      *((ULong*)(dst+4)) = temp2; }
1493
1494#define PAD_COL     { temp = *src;   temp |= (temp<<8);  temp |= (temp<<16); \
1495                      *((ULong*)dst) = temp; \
1496                     *((ULong*)(dst+4)) = temp; \
1497                      temp = *(src+=lx);     temp |= (temp<<8);  temp |= (temp<<16); \
1498                      *((ULong*)(dst+=lx)) = temp; \
1499                     *((ULong*)(dst+4)) = temp; \
1500                      temp = *(src+=lx);     temp |= (temp<<8);  temp |= (temp<<16); \
1501                      *((ULong*)(dst+=lx)) = temp; \
1502                     *((ULong*)(dst+4)) = temp; \
1503                      temp = *(src+=lx);     temp |= (temp<<8);  temp |= (temp<<16); \
1504                      *((ULong*)(dst+=lx)) = temp; \
1505                     *((ULong*)(dst+4)) = temp; \
1506                      temp = *(src+=lx);     temp |= (temp<<8);  temp |= (temp<<16); \
1507                      *((ULong*)(dst+=lx)) = temp; \
1508                     *((ULong*)(dst+4)) = temp; \
1509                      temp = *(src+=lx);     temp |= (temp<<8);  temp |= (temp<<16); \
1510                      *((ULong*)(dst+=lx)) = temp; \
1511                     *((ULong*)(dst+4)) = temp; \
1512                      temp = *(src+=lx);     temp |= (temp<<8);  temp |= (temp<<16); \
1513                      *((ULong*)(dst+=lx)) = temp; \
1514                     *((ULong*)(dst+4)) = temp; \
1515                      temp = *(src+=lx);     temp |= (temp<<8);  temp |= (temp<<16); \
1516                      *((ULong*)(dst+=lx)) = temp; \
1517                      *((ULong*)(dst+4)) = temp; }
1518
1519
1520Int EncGetPredOutside(Int xpos, Int ypos, UChar *c_prev, UChar *rec,
1521                      Int width, Int height, Int rnd1)
1522{
1523    Int lx;
1524    UChar *src, *dst;
1525    ULong temp, temp2;
1526    Int xoffset;
1527
1528    lx = width + 16; /* only works for chroma */
1529
1530    if (xpos < 0)
1531    {
1532        if (ypos < 0) /* pad top-left */
1533        {
1534            /* pad corner */
1535            src = c_prev;
1536            dst = c_prev - (lx << 3) - 8;
1537            PAD_CORNER
1538
1539            /* pad top */
1540            dst = c_prev - (lx << 3);
1541            PAD_ROW
1542
1543            /* pad left */
1544            dst = c_prev - 8;
1545            PAD_COL
1546
1547            GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
1548                                             rec, lx, rnd1);
1549
1550            return 1;
1551        }
1552        else if ((ypos >> 1) < (height - 8)) /* pad left of frame */
1553        {
1554            /* pad left */
1555            src = c_prev + (ypos >> 1) * lx;
1556            dst = src - 8;
1557            PAD_COL
1558            /* pad extra row */
1559            temp = *(src += lx);
1560            temp |= (temp << 8);
1561            temp |= (temp << 16);
1562            *((ULong*)(dst += lx)) = temp;
1563            *((ULong*)(dst + 4)) = temp;
1564
1565            GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
1566                                             rec, lx, rnd1);
1567
1568            return 1;
1569        }
1570        else /* pad bottom-left */
1571        {
1572            /* pad corner */
1573            src = c_prev + (height - 1) * lx;
1574            dst = src + lx - 8;
1575            PAD_CORNER
1576
1577            /* pad bottom */
1578            dst = src + lx;
1579            PAD_ROW
1580
1581            /* pad left */
1582            src -= (lx << 3);
1583            src += lx;
1584            dst = src - 8;
1585            PAD_COL
1586
1587            GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
1588                                             rec, lx, rnd1);
1589
1590            return 1;
1591        }
1592    }
1593    else if ((xpos >> 1) < (width - 8))
1594    {
1595        if (ypos < 0) /* pad top of frame */
1596        {
1597            xoffset = (xpos >> 1) & 0x3;
1598            src = c_prev + (xpos >> 1) - xoffset;
1599            dst = src - (lx << 3);
1600            PAD_ROW
1601            if (xoffset || (xpos&1))
1602            {
1603                temp = *((ULong*)(src + 8));
1604                dst = src - (lx << 3) + 8;
1605                *((ULong*)dst) = temp;
1606                *((ULong*)(dst += lx)) = temp;
1607                *((ULong*)(dst += lx)) = temp;
1608                *((ULong*)(dst += lx)) = temp;
1609                *((ULong*)(dst += lx)) = temp;
1610                *((ULong*)(dst += lx)) = temp;
1611                *((ULong*)(dst += lx)) = temp;
1612                *((ULong*)(dst += lx)) = temp;
1613            }
1614
1615            GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
1616                                             rec, lx, rnd1);
1617
1618            return 1;
1619        }
1620        else /* pad bottom of frame */
1621        {
1622            xoffset = (xpos >> 1) & 0x3;
1623            src = c_prev + (xpos >> 1) - xoffset + (height - 1) * lx;
1624            dst = src + lx;
1625            PAD_ROW
1626            if (xoffset || (xpos&1))
1627            {
1628                temp = *((ULong*)(src + 8));
1629                dst = src + lx + 8;
1630                *((ULong*)dst) = temp;
1631                *((ULong*)(dst += lx)) = temp;
1632                *((ULong*)(dst += lx)) = temp;
1633                *((ULong*)(dst += lx)) = temp;
1634                *((ULong*)(dst += lx)) = temp;
1635                *((ULong*)(dst += lx)) = temp;
1636                *((ULong*)(dst += lx)) = temp;
1637                *((ULong*)(dst += lx)) = temp;
1638            }
1639
1640            GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
1641                                             rec, lx, rnd1);
1642
1643            return 1;
1644        }
1645    }
1646    else
1647    {
1648        if (ypos < 0) /* pad top-right */
1649        {
1650            /* pad corner */
1651            src = c_prev + width - 1;
1652            dst = src - (lx << 3) + 1;
1653            PAD_CORNER
1654
1655            /* pad top */
1656            src -= 7;
1657            dst = src - (lx << 3);
1658            PAD_ROW
1659
1660            /* pad left */
1661            src += 7;
1662            dst = src + 1;
1663            PAD_COL
1664
1665            GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
1666                                             rec, lx, rnd1);
1667
1668            return 1;
1669        }
1670        else if ((ypos >> 1) < (height - B_SIZE)) /* pad right of frame */
1671        {
1672            /* pad left */
1673            src = c_prev + (ypos >> 1) * lx + width - 1;
1674            dst = src + 1;
1675            PAD_COL
1676            /* pad extra row */
1677            temp = *(src += lx);
1678            temp |= (temp << 8);
1679            temp |= (temp << 16);
1680            *((ULong*)(dst += lx)) = temp;
1681            *((ULong*)(dst + 4)) = temp;
1682
1683            GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
1684                                             rec, lx, rnd1);
1685
1686            return 1;
1687        }
1688        else /* pad bottom-right */
1689        {
1690            /* pad left */
1691            src = c_prev + (height - 8) * lx + width - 1;
1692            dst = src + 1;
1693            PAD_COL
1694
1695            /* pad corner */
1696            dst = src + lx + 1;
1697            PAD_CORNER
1698
1699            /* pad bottom */
1700            src -= 7;
1701            dst = src + lx;
1702            PAD_ROW
1703
1704            GetPredAdvBTable[ypos&1][xpos&1](c_prev + (xpos >> 1) + ((ypos >> 1)*lx),
1705                                             rec, lx, rnd1);
1706
1707            return 1;
1708        }
1709    }
1710}
1711
1712/* ====================================================================== /
1713    Function : Copy_MB_from_Vop()
1714    Date     : 04/17/2001
1715 ====================================================================== */
1716
1717void Copy_MB_from_Vop(UChar *comp, Int yChan[][NCOEFF_BLOCK], Int pitch)
1718{
1719    Int row, col, i;
1720    Int *src1, *src2;
1721    Int offset = pitch - MB_SIZE;
1722    ULong temp;
1723
1724    for (i = 0; i < 4; i += 2)
1725    {
1726        src1 = yChan[i];
1727        src2 = yChan[i+1];
1728
1729        row = B_SIZE;
1730        while (row--)
1731        {
1732            col = B_SIZE;
1733            while (col)
1734            {
1735                temp = *((ULong*)comp);
1736                *src1++ = (Int)(temp & 0xFF);
1737                *src1++ = (Int)((temp >> 8) & 0xFF);
1738                *src1++ = (Int)((temp >> 16) & 0xFF);
1739                *src1++ = (Int)((temp >> 24) & 0xFF);
1740                comp += 4;
1741                col -= 4;
1742            }
1743            col = B_SIZE;
1744            while (col)
1745            {
1746                temp = *((ULong*)comp);
1747                *src2++ = (Int)(temp & 0xFF);
1748                *src2++ = (Int)((temp >> 8) & 0xFF);
1749                *src2++ = (Int)((temp >> 16) & 0xFF);
1750                *src2++ = (Int)((temp >> 24) & 0xFF);
1751                comp += 4;
1752                col -= 4;
1753            }
1754            comp += offset;
1755        }
1756    }
1757    return ;
1758}
1759
1760/* ====================================================================== /
1761    Function : Copy_B_from_Vop()
1762    Date     : 04/17/2001
1763/ ====================================================================== */
1764
1765void Copy_B_from_Vop(UChar *comp, Int cChan[], Int pitch)
1766{
1767    Int row, col;
1768    Int offset = pitch - B_SIZE;
1769    ULong temp;
1770
1771    row = B_SIZE;
1772    while (row--)
1773    {
1774        col = B_SIZE;
1775        while (col)
1776        {
1777            temp = *((ULong*)comp);
1778            *cChan++ = (Int)(temp & 0xFF);
1779            *cChan++ = (Int)((temp >> 8) & 0xFF);
1780            *cChan++ = (Int)((temp >> 16) & 0xFF);
1781            *cChan++ = (Int)((temp >> 24) & 0xFF);
1782            comp += 4;
1783            col -= 4;
1784        }
1785        comp += offset;
1786    }
1787}
1788
1789/* ====================================================================== /
1790    Function : Copy_MB_into_Vop()
1791    Date     : 04/17/2001
1792    History  : From decoder
1793/ ====================================================================== */
1794
1795void Copy_MB_into_Vop(UChar *comp, Int yChan[][NCOEFF_BLOCK], Int pitch)
1796{
1797    Int row, col, i;
1798    Int *src1, *src2;
1799    Int offset = pitch - MB_SIZE;
1800    UChar mask = 0xFF;
1801    Int tmp;
1802    ULong temp;
1803
1804    for (i = 0; i < 4; i += 2)
1805    {
1806        src1 = yChan[i];
1807        src2 = yChan[i+1];
1808
1809        row = B_SIZE;
1810        while (row--)
1811        {
1812            col = B_SIZE;
1813            while (col)
1814            {
1815                tmp = (*src1++);
1816                if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1817                temp = tmp << 24;
1818                tmp = (*src1++);
1819                if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1820                temp |= (tmp << 16);
1821                tmp = (*src1++);
1822                if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1823                temp |= (tmp << 8);
1824                tmp = (*src1++);
1825                if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1826                temp |= tmp;
1827                *((ULong*)comp) = temp;
1828                comp += 4;
1829                col -= 4;
1830            }
1831            col = B_SIZE;
1832            while (col)
1833            {
1834                tmp = (*src2++);
1835                if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1836                temp = tmp << 24;
1837                tmp = (*src2++);
1838                if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1839                temp |= (tmp << 16);
1840                tmp = (*src2++);
1841                if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1842                temp |= (tmp << 8);
1843                tmp = (*src2++);
1844                if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1845                temp |= tmp;
1846                *((ULong*)comp) = temp;
1847                comp += 4;
1848                col -= 4;
1849            }
1850            comp += offset;
1851        }
1852    }
1853    return ;
1854}
1855
1856
1857/* ====================================================================== /
1858    Function : Copy_B_into_Vop()
1859    Date     : 04/17/2001
1860    History  : From decoder
1861/ ====================================================================== */
1862
1863void Copy_B_into_Vop(UChar *comp, Int cChan[], Int pitch)
1864{
1865    Int row, col;
1866    Int offset = pitch - B_SIZE;
1867    Int tmp;
1868    UChar mask = 0xFF;
1869    ULong temp;
1870
1871    row = B_SIZE;
1872    while (row--)
1873    {
1874        col = B_SIZE;
1875        while (col)
1876        {
1877            tmp = (*cChan++);
1878            if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1879            temp = tmp << 24;
1880            tmp = (*cChan++);
1881            if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1882            temp |= (tmp << 16);
1883            tmp = (*cChan++);
1884            if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1885            temp |= (tmp << 8);
1886            tmp = (*cChan++);
1887            if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31));
1888            temp |= tmp;
1889            *((ULong*)comp) = temp;
1890            comp += 4;
1891            col -= 4;
1892        }
1893        comp += offset;
1894    }
1895}
1896
1897/* ======================================================================== */
1898/*  Function : get_MB( )                                                    */
1899/*  Date     : 10/03/2000                                                   */
1900/*  Purpose  : Copy 4 Y to reference frame                                  */
1901/*  In/out   :                                                              */
1902/*  Return   :                                                              */
1903/*  Modified :                                                              */
1904/* ======================================================================== */
1905void get_MB(UChar *c_prev, UChar *c_prev_u  , UChar *c_prev_v,
1906            Short mb[6][64], Int lx, Int lx_uv)
1907
1908{
1909    Int i, j, count = 0, count1 = 0;
1910    Int k1 = lx - MB_SIZE, k2 = lx_uv - B_SIZE;
1911
1912    for (i = 0; i < B_SIZE; i++)
1913    {
1914        for (j = 0; j < B_SIZE; j++)
1915        {
1916            mb[0][count] = (Int)(*c_prev++);
1917            mb[4][count] = (Int)(*c_prev_u++);
1918            mb[5][count++] = (Int)(*c_prev_v++);
1919        }
1920
1921        for (j = 0; j < B_SIZE; j++)
1922            mb[1][count1++] = (Int)(*c_prev++);
1923
1924        c_prev += k1;
1925        c_prev_u += k2;
1926        c_prev_v += k2;
1927
1928
1929    }
1930
1931    count = count1 = 0;
1932    for (i = 0; i < B_SIZE; i++)
1933    {
1934        for (j = 0; j < B_SIZE; j++)
1935            mb[2][count++] = (Int)(*c_prev++);
1936
1937        for (j = 0; j < B_SIZE; j++)
1938            mb[3][count1++] = (Int)(*c_prev++);
1939
1940        c_prev += k1;
1941    }
1942}
1943
1944void PutSkippedBlock(UChar *rec, UChar *prev, Int lx)
1945{
1946    UChar *end;
1947    Int offset = (lx - 8) >> 2;
1948    Int *src, *dst;
1949
1950    dst = (Int*)rec;
1951    src = (Int*)prev;
1952
1953    end = prev + (lx << 3);
1954
1955    do
1956    {
1957        *dst++ = *src++;
1958        *dst++ = *src++;
1959        dst += offset;
1960        src += offset;
1961    }
1962    while ((UInt)src < (UInt)end);
1963
1964    return ;
1965}
1966