1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*------------------------------------------------------------------------------
18
19    Table of contents
20
21     1. Include headers
22     2. External compiler flags
23     3. Module defines
24     4. Local function prototypes
25     5. Functions
26          h264bsdWriteMacroblock
27          h264bsdWriteOutputBlocks
28
29------------------------------------------------------------------------------*/
30
31/*------------------------------------------------------------------------------
32    1. Include headers
33------------------------------------------------------------------------------*/
34
35#include "h264bsd_image.h"
36#include "h264bsd_util.h"
37#include "h264bsd_neighbour.h"
38
39/*------------------------------------------------------------------------------
40    2. External compiler flags
41--------------------------------------------------------------------------------
42
43--------------------------------------------------------------------------------
44    3. Module defines
45------------------------------------------------------------------------------*/
46
47/* x- and y-coordinates for each block, defined in h264bsd_intra_prediction.c */
48extern const u32 h264bsdBlockX[];
49extern const u32 h264bsdBlockY[];
50
51/* clipping table, defined in h264bsd_intra_prediction.c */
52extern const u8 h264bsdClip[];
53
54/*------------------------------------------------------------------------------
55    4. Local function prototypes
56------------------------------------------------------------------------------*/
57
58
59
60/*------------------------------------------------------------------------------
61
62    Function: h264bsdWriteMacroblock
63
64        Functional description:
65            Write one macroblock into the image. Both luma and chroma
66            components will be written at the same time.
67
68        Inputs:
69            data    pointer to macroblock data to be written, 256 values for
70                    luma followed by 64 values for both chroma components
71
72        Outputs:
73            image   pointer to the image where the macroblock will be written
74
75        Returns:
76            none
77
78------------------------------------------------------------------------------*/
79#ifndef H264DEC_NEON
80void h264bsdWriteMacroblock(image_t *image, u8 *data)
81{
82
83/* Variables */
84
85    u32 i;
86    u32 width;
87    u32 *lum, *cb, *cr;
88    u32 *ptr;
89    u32 tmp1, tmp2;
90
91/* Code */
92
93    ASSERT(image);
94    ASSERT(data);
95    ASSERT(!((u32)data&0x3));
96
97    width = image->width;
98
99    /*lint -save -e826 lum, cb and cr used to copy 4 bytes at the time, disable
100     * "area too small" info message */
101    lum = (u32*)image->luma;
102    cb = (u32*)image->cb;
103    cr = (u32*)image->cr;
104    ASSERT(!((u32)lum&0x3));
105    ASSERT(!((u32)cb&0x3));
106    ASSERT(!((u32)cr&0x3));
107
108    ptr = (u32*)data;
109
110    width *= 4;
111    for (i = 16; i ; i--)
112    {
113        tmp1 = *ptr++;
114        tmp2 = *ptr++;
115        *lum++ = tmp1;
116        *lum++ = tmp2;
117        tmp1 = *ptr++;
118        tmp2 = *ptr++;
119        *lum++ = tmp1;
120        *lum++ = tmp2;
121        lum += width-4;
122    }
123
124    width >>= 1;
125    for (i = 8; i ; i--)
126    {
127        tmp1 = *ptr++;
128        tmp2 = *ptr++;
129        *cb++ = tmp1;
130        *cb++ = tmp2;
131        cb += width-2;
132    }
133
134    for (i = 8; i ; i--)
135    {
136        tmp1 = *ptr++;
137        tmp2 = *ptr++;
138        *cr++ = tmp1;
139        *cr++ = tmp2;
140        cr += width-2;
141    }
142
143}
144#endif
145#ifndef H264DEC_OMXDL
146/*------------------------------------------------------------------------------
147
148    Function: h264bsdWriteOutputBlocks
149
150        Functional description:
151            Write one macroblock into the image. Prediction for the macroblock
152            and the residual are given separately and will be combined while
153            writing the data to the image
154
155        Inputs:
156            data        pointer to macroblock prediction data, 256 values for
157                        luma followed by 64 values for both chroma components
158            mbNum       number of the macroblock
159            residual    pointer to residual data, 16 16-element arrays for luma
160                        followed by 4 16-element arrays for both chroma
161                        components
162
163        Outputs:
164            image       pointer to the image where the data will be written
165
166        Returns:
167            none
168
169------------------------------------------------------------------------------*/
170
171void h264bsdWriteOutputBlocks(image_t *image, u32 mbNum, u8 *data,
172        i32 residual[][16])
173{
174
175/* Variables */
176
177    u32 i;
178    u32 picWidth, picSize;
179    u8 *lum, *cb, *cr;
180    u8 *imageBlock;
181    u8 *tmp;
182    u32 row, col;
183    u32 block;
184    u32 x, y;
185    i32 *pRes;
186    i32 tmp1, tmp2, tmp3, tmp4;
187    const u8 *clp = h264bsdClip + 512;
188
189/* Code */
190
191    ASSERT(image);
192    ASSERT(data);
193    ASSERT(mbNum < image->width * image->height);
194    ASSERT(!((u32)data&0x3));
195
196    /* Image size in macroblocks */
197    picWidth = image->width;
198    picSize = picWidth * image->height;
199    row = mbNum / picWidth;
200    col = mbNum % picWidth;
201
202    /* Output macroblock position in output picture */
203    lum = (image->data + row * picWidth * 256 + col * 16);
204    cb = (image->data + picSize * 256 + row * picWidth * 64 + col * 8);
205    cr = (cb + picSize * 64);
206
207    picWidth *= 16;
208
209    for (block = 0; block < 16; block++)
210    {
211        x = h264bsdBlockX[block];
212        y = h264bsdBlockY[block];
213
214        pRes = residual[block];
215
216        ASSERT(pRes);
217
218        tmp = data + y*16 + x;
219        imageBlock = lum + y*picWidth + x;
220
221        ASSERT(!((u32)tmp&0x3));
222        ASSERT(!((u32)imageBlock&0x3));
223
224        if (IS_RESIDUAL_EMPTY(pRes))
225        {
226            /*lint -e826 */
227            i32 *in32 = (i32*)tmp;
228            i32 *out32 = (i32*)imageBlock;
229
230            /* Residual is zero => copy prediction block to output */
231            tmp1 = *in32;  in32 += 4;
232            tmp2 = *in32;  in32 += 4;
233            *out32 = tmp1; out32 += picWidth/4;
234            *out32 = tmp2; out32 += picWidth/4;
235            tmp1 = *in32;  in32 += 4;
236            tmp2 = *in32;
237            *out32 = tmp1; out32 += picWidth/4;
238            *out32 = tmp2;
239        }
240        else
241        {
242
243            RANGE_CHECK_ARRAY(pRes, -512, 511, 16);
244
245            /* Calculate image = prediction + residual
246             * Process four pixels in a loop */
247            for (i = 4; i; i--)
248            {
249                tmp1 = tmp[0];
250                tmp2 = *pRes++;
251                tmp3 = tmp[1];
252                tmp1 = clp[tmp1 + tmp2];
253                tmp4 = *pRes++;
254                imageBlock[0] = (u8)tmp1;
255                tmp3 = clp[tmp3 + tmp4];
256                tmp1 = tmp[2];
257                tmp2 = *pRes++;
258                imageBlock[1] = (u8)tmp3;
259                tmp1 = clp[tmp1 + tmp2];
260                tmp3 = tmp[3];
261                tmp4 = *pRes++;
262                imageBlock[2] = (u8)tmp1;
263                tmp3 = clp[tmp3 + tmp4];
264                tmp += 16;
265                imageBlock[3] = (u8)tmp3;
266                imageBlock += picWidth;
267            }
268        }
269
270    }
271
272    picWidth /= 2;
273
274    for (block = 16; block <= 23; block++)
275    {
276        x = h264bsdBlockX[block & 0x3];
277        y = h264bsdBlockY[block & 0x3];
278
279        pRes = residual[block];
280
281        ASSERT(pRes);
282
283        tmp = data + 256;
284        imageBlock = cb;
285
286        if (block >= 20)
287        {
288            imageBlock = cr;
289            tmp += 64;
290        }
291
292        tmp += y*8 + x;
293        imageBlock += y*picWidth + x;
294
295        ASSERT(!((u32)tmp&0x3));
296        ASSERT(!((u32)imageBlock&0x3));
297
298        if (IS_RESIDUAL_EMPTY(pRes))
299        {
300            /*lint -e826 */
301            i32 *in32 = (i32*)tmp;
302            i32 *out32 = (i32*)imageBlock;
303
304            /* Residual is zero => copy prediction block to output */
305            tmp1 = *in32;  in32 += 2;
306            tmp2 = *in32;  in32 += 2;
307            *out32 = tmp1; out32 += picWidth/4;
308            *out32 = tmp2; out32 += picWidth/4;
309            tmp1 = *in32;  in32 += 2;
310            tmp2 = *in32;
311            *out32 = tmp1; out32 += picWidth/4;
312            *out32 = tmp2;
313        }
314        else
315        {
316
317            RANGE_CHECK_ARRAY(pRes, -512, 511, 16);
318
319            for (i = 4; i; i--)
320            {
321                tmp1 = tmp[0];
322                tmp2 = *pRes++;
323                tmp3 = tmp[1];
324                tmp1 = clp[tmp1 + tmp2];
325                tmp4 = *pRes++;
326                imageBlock[0] = (u8)tmp1;
327                tmp3 = clp[tmp3 + tmp4];
328                tmp1 = tmp[2];
329                tmp2 = *pRes++;
330                imageBlock[1] = (u8)tmp3;
331                tmp1 = clp[tmp1 + tmp2];
332                tmp3 = tmp[3];
333                tmp4 = *pRes++;
334                imageBlock[2] = (u8)tmp1;
335                tmp3 = clp[tmp3 + tmp4];
336                tmp += 8;
337                imageBlock[3] = (u8)tmp3;
338                imageBlock += picWidth;
339            }
340        }
341    }
342
343}
344#endif /* H264DEC_OMXDL */
345
346