1/* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18#include "mp4def.h"
19#include "mp4enc_lib.h"
20#include "mp4lib_int.h"
21#include "m4venc_oscl.h"
22
23#define VOP_OFFSET  ((lx<<4)+16)  /* for offset to image area */
24#define CVOP_OFFSET ((lx<<2)+8)
25
26#define PREF_INTRA  512     /* bias for INTRA coding */
27
28/*===============================================================
29    Function:   ChooseMode
30    Date:       09/21/2000
31    Purpose:    Choosing between INTRA or INTER
32    Input/Output: Pointer to the starting point of the macroblock.
33    Note:
34===============================================================*/
35void ChooseMode_C(UChar *Mode, UChar *cur, Int lx, Int min_SAD)
36{
37    Int i, j;
38    Int MB_mean, A, tmp, Th;
39    Int offset = (lx >> 2) - 4;
40    UChar *p = cur;
41    Int *pint = (Int *) cur, temp = 0;
42    MB_mean = 0;
43    A = 0;
44    Th = (min_SAD - PREF_INTRA) >> 1;
45
46    for (j = 0; j < 8; j++)
47    {
48
49        /* Odd Rows */
50        temp += (*pint++) & 0x00FF00FF;
51        temp += (*pint++) & 0x00FF00FF;
52        temp += (*pint++) & 0x00FF00FF;
53        temp += (*pint++) & 0x00FF00FF;
54        pint += offset;
55
56        /* Even Rows */
57        temp += (*pint++ >> 8) & 0x00FF00FF;
58        temp += (*pint++ >> 8) & 0x00FF00FF;
59        temp += (*pint++ >> 8) & 0x00FF00FF;
60        temp += (*pint++ >> 8) & 0x00FF00FF;
61        pint += offset;
62
63    }
64
65    MB_mean = (((temp & 0x0000FFFF)) + ((temp & 0xFFFF0000) >> 16)) >> 7;
66
67    p = cur;
68    offset = lx - 16;
69    for (j = 0; j < 16; j++)
70    {
71        temp = (j & 1);
72        p += temp;
73        i = 8;
74        while (i--)
75        {
76            tmp = *p - MB_mean;
77            p += 2;
78            if (tmp > 0) A += tmp;
79            else    A -= tmp;
80        }
81
82        if (A >= Th)
83        {
84            *Mode = MODE_INTER;
85            return ;
86        }
87        p += (offset - temp);
88    }
89
90    if (A < Th)
91        *Mode = MODE_INTRA;
92    else
93        *Mode = MODE_INTER;
94
95    return ;
96}
97
98
99/*===============================================================
100    Function:   GetHalfPelMBRegion
101    Date:       09/17/2000
102    Purpose:    Interpolate the search region for half-pel search
103    Input/Output:   Center of the search, Half-pel memory, width
104    Note:       rounding type should be parameterized.
105                Now fixed it to zero!!!!!!
106
107===============================================================*/
108
109
110void GetHalfPelMBRegion_C(UChar *cand, UChar *hmem, Int lx)
111{
112    Int i, j;
113    UChar *p1, *p2, *p3, *p4;
114    UChar *hmem1 = hmem;
115    UChar *hmem2 = hmem1 + 33;
116    Int offset = lx - 17;
117
118    p1 = cand - lx - 1;
119    p2 = cand - lx;
120    p3 = cand - 1;
121    p4 = cand;
122
123    for (j = 0; j < 16; j++)
124    {
125        for (i = 0; i < 16; i++)
126        {
127            *hmem1++ = ((*p1++) + *p2 + *p3 + *p4 + 2) >> 2;
128            *hmem1++ = ((*p2++) + *p4 + 1) >> 1;
129            *hmem2++ = ((*p3++) + *p4 + 1) >> 1;
130            *hmem2++ = *p4++;
131        }
132        /*  last pixel */
133        *hmem1++ = ((*p1++) + (*p2++) + *p3 + *p4 + 2) >> 2;
134        *hmem2++ = ((*p3++) + (*p4++) + 1) >> 1;
135        hmem1 += 33;
136        hmem2 += 33;
137        p1 += offset;
138        p2 += offset;
139        p3 += offset;
140        p4 += offset;
141    }
142    /* last row */
143    for (i = 0; i < 16; i++)
144    {
145        *hmem1++ = ((*p1++) + *p2 + (*p3++) + *p4 + 2) >> 2;
146        *hmem1++ = ((*p2++) + (*p4++) + 1) >> 1;
147
148    }
149    *hmem1 = (*p1 + *p2 + *p3 + *p4 + 2) >> 2;
150
151    return ;
152}
153
154/*===============================================================
155   Function:    GetHalfPelBlkRegion
156   Date:        09/20/2000
157   Purpose: Interpolate the search region for half-pel search
158            in 4MV mode.
159   Input/Output:    Center of the search, Half-pel memory, width
160   Note:        rounding type should be parameterized.
161            Now fixed it to zero!!!!!!
162
163===============================================================*/
164
165
166void GetHalfPelBlkRegion(UChar *cand, UChar *hmem, Int lx)
167{
168    Int i, j;
169    UChar *p1, *p2, *p3, *p4;
170    UChar *hmem1 = hmem;
171    UChar *hmem2 = hmem1 + 17;
172    Int offset = lx - 9;
173
174    p1 = cand - lx - 1;
175    p2 = cand - lx;
176    p3 = cand - 1;
177    p4 = cand;
178
179    for (j = 0; j < 8; j++)
180    {
181        for (i = 0; i < 8; i++)
182        {
183            *hmem1++ = ((*p1++) + *p2 + *p3 + *p4 + 2) >> 2;
184            *hmem1++ = ((*p2++) + *p4 + 1) >> 1;
185            *hmem2++ = ((*p3++) + *p4 + 1) >> 1;
186            *hmem2++ = *p4++;
187        }
188        /*  last pixel */
189        *hmem1++ = ((*p1++) + (*p2++) + *p3 + *p4 + 2) >> 2;
190        *hmem2++ = ((*p3++) + (*p4++) + 1) >> 1;
191        hmem1 += 17;
192        hmem2 += 17;
193        p1 += offset;
194        p2 += offset;
195        p3 += offset;
196        p4 += offset;
197    }
198    /* last row */
199    for (i = 0; i < 8; i++)
200    {
201        *hmem1++ = ((*p1++) + *p2 + (*p3++) + *p4 + 2) >> 2;
202        *hmem1++ = ((*p2++) + (*p4++) + 1) >> 1;
203
204    }
205    *hmem1 = (*p1 + *p2 + *p3 + *p4 + 2) >> 2;
206
207    return ;
208}
209
210
211/*=====================================================================
212    Function:   PaddingEdge
213    Date:       09/16/2000
214    Purpose:    Pad edge of a Vop
215    Modification: 09/20/05.
216=====================================================================*/
217
218void  PaddingEdge(Vop *refVop)
219{
220    UChar *src, *dst;
221    Int i;
222    Int pitch, width, height;
223    ULong temp1, temp2;
224
225    width = refVop->width;
226    height = refVop->height;
227    pitch = refVop->pitch;
228
229    /* pad top */
230    src = refVop->yChan;
231
232    temp1 = *src; /* top-left corner */
233    temp2 = src[width-1]; /* top-right corner */
234    temp1 |= (temp1 << 8);
235    temp1 |= (temp1 << 16);
236    temp2 |= (temp2 << 8);
237    temp2 |= (temp2 << 16);
238
239    dst = src - (pitch << 4);
240
241    *((ULong*)(dst - 16)) = temp1;
242    *((ULong*)(dst - 12)) = temp1;
243    *((ULong*)(dst - 8)) = temp1;
244    *((ULong*)(dst - 4)) = temp1;
245
246    M4VENC_MEMCPY(dst, src, width);
247
248    *((ULong*)(dst += width)) = temp2;
249    *((ULong*)(dst + 4)) = temp2;
250    *((ULong*)(dst + 8)) = temp2;
251    *((ULong*)(dst + 12)) = temp2;
252
253    dst = dst - width - 16;
254
255    i = 15;
256    while (i--)
257    {
258        M4VENC_MEMCPY(dst + pitch, dst, pitch);
259        dst += pitch;
260    }
261
262    /* pad sides */
263    dst += (pitch + 16);
264    src = dst;
265    i = height;
266    while (i--)
267    {
268        temp1 = *src;
269        temp2 = src[width-1];
270        temp1 |= (temp1 << 8);
271        temp1 |= (temp1 << 16);
272        temp2 |= (temp2 << 8);
273        temp2 |= (temp2 << 16);
274
275        *((ULong*)(dst - 16)) = temp1;
276        *((ULong*)(dst - 12)) = temp1;
277        *((ULong*)(dst - 8)) = temp1;
278        *((ULong*)(dst - 4)) = temp1;
279
280        *((ULong*)(dst += width)) = temp2;
281        *((ULong*)(dst + 4)) = temp2;
282        *((ULong*)(dst + 8)) = temp2;
283        *((ULong*)(dst + 12)) = temp2;
284
285        src += pitch;
286        dst = src;
287    }
288
289    /* pad bottom */
290    dst -= 16;
291    i = 16;
292    while (i--)
293    {
294        M4VENC_MEMCPY(dst, dst - pitch, pitch);
295        dst += pitch;
296    }
297
298
299    return ;
300}
301
302/*===================================================================
303    Function:   ComputeMBSum
304    Date:       10/28/2000
305    Purpose:    Compute sum of absolute value (SAV) of blocks in a macroblock
306                in INTRA mode needed for rate control. Thus, instead of
307                computing the SAV, we can compute first order moment or
308                variance .
309
310    11/28/00:    add MMX
311    9/3/01:      do parallel comp for C function.
312===================================================================*/
313void ComputeMBSum_C(UChar *cur, Int lx, MOT *mot_mb)
314{
315    Int j;
316    Int *cInt, *cInt2;
317    Int sad1 = 0, sad2 = 0, sad3 = 0, sad4 = 0;
318    Int tmp, tmp2, mask = 0x00FF00FF;
319
320    cInt = (Int*)cur;   /* make sure this is word-align */
321    cInt2 = (Int*)(cur + (lx << 3));
322    j = 8;
323    while (j--)
324    {
325        tmp = cInt[3];  /* load 4 pixels at a time */
326        tmp2 = tmp & mask;
327        tmp = (tmp >> 8) & mask;
328        tmp += tmp2;
329        sad2 += tmp;
330        tmp = cInt[2];
331        tmp2 = tmp & mask;
332        tmp = (tmp >> 8) & mask;
333        tmp += tmp2;
334        sad2 += tmp;
335        tmp = cInt[1];
336        tmp2 = tmp & mask;
337        tmp = (tmp >> 8) & mask;
338        tmp += tmp2;
339        sad1 += tmp;
340        tmp = *cInt;
341        cInt += (lx >> 2);
342        tmp2 = tmp & mask;
343        tmp = (tmp >> 8) & mask;
344        tmp += tmp2;
345        sad1 += tmp;
346
347        tmp = cInt2[3];
348        tmp2 = tmp & mask;
349        tmp = (tmp >> 8) & mask;
350        tmp += tmp2;
351        sad4 += tmp;
352        tmp = cInt2[2];
353        tmp2 = tmp & mask;
354        tmp = (tmp >> 8) & mask;
355        tmp += tmp2;
356        sad4 += tmp;
357        tmp = cInt2[1];
358        tmp2 = tmp & mask;
359        tmp = (tmp >> 8) & mask;
360        tmp += tmp2;
361        sad3 += tmp;
362        tmp = *cInt2;
363        cInt2 += (lx >> 2);
364        tmp2 = tmp & mask;
365        tmp = (tmp >> 8) & mask;
366        tmp += tmp2;
367        sad3 += tmp;
368    }
369    sad1 += (sad1 << 16);
370    sad2 += (sad2 << 16);
371    sad3 += (sad3 << 16);
372    sad4 += (sad4 << 16);
373    sad1 >>= 16;
374    sad2 >>= 16;
375    sad3 >>= 16;
376    sad4 >>= 16;
377
378    mot_mb[1].sad = sad1;
379    mot_mb[2].sad = sad2;
380    mot_mb[3].sad = sad3;
381    mot_mb[4].sad = sad4;
382    mot_mb[0].sad = sad1 + sad2 + sad3 + sad4;
383
384    return ;
385}
386
387