1/******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*/
20/**
21 *******************************************************************************
22 * @file
23 *  ih264_inter_pred_filters.c
24 *
25 * @brief
26 *  Contains function definitions for inter prediction interpolation filters
27 *
28 * @author
29 *  Ittiam
30 *
31 * @par List of Functions:
32 *  - ih264_inter_pred_luma_copy
33 *  - ih264_interleave_copy
34 *  - ih264_inter_pred_luma_horz
35 *  - ih264_inter_pred_luma_vert
36 *  - ih264_inter_pred_luma_horz_hpel_vert_hpel
37 *  - ih264_inter_pred_luma_horz_qpel
38 *  - ih264_inter_pred_luma_vert_qpel
39 *  - ih264_inter_pred_luma_horz_qpel_vert_qpel
40 *  - ih264_inter_pred_luma_horz_hpel_vert_qpel
41 *  - ih264_inter_pred_luma_horz_qpel_vert_hpel
42 *  - ih264_inter_pred_luma_bilinear
43 *  - ih264_inter_pred_chroma
44 *
45 * @remarks
46 *  None
47 *
48 *******************************************************************************
49 */
50
51/*****************************************************************************/
52/* File Includes                                                             */
53/*****************************************************************************/
54
55/* User include files */
56#include "ih264_typedefs.h"
57#include "ih264_macros.h"
58#include "ih264_platform_macros.h"
59#include "ih264_inter_pred_filters.h"
60
61
62/*****************************************************************************/
63/* Constant Data variables                                                   */
64/*****************************************************************************/
65
66/* coefficients for 6 tap filtering*/
67const WORD32 ih264_g_six_tap[3] ={1,-5,20};
68
69
70/*****************************************************************************/
71/*  Function definitions .                                                   */
72/*****************************************************************************/
73/**
74 *******************************************************************************
75 *
76 * @brief
77 * Interprediction luma function for copy
78 *
79 * @par Description:
80 *    Copies the array of width 'wd' and height 'ht' from the  location pointed
81 *    by 'src' to the location pointed by 'dst'
82 *
83 * @param[in] pu1_src
84 *  UWORD8 pointer to the source
85 *
86 * @param[out] pu1_dst
87 *  UWORD8 pointer to the destination
88 *
89 * @param[in] src_strd
90 *  integer source stride
91 *
92 * @param[in] dst_strd
93 *  integer destination stride
94 *
95 *
96 * @param[in] ht
97 *  integer height of the array
98 *
99 * @param[in] wd
100 *  integer width of the array
101 *
102 * @returns
103 *
104 * @remarks
105 *  None
106 *
107 *******************************************************************************
108 */
109
110void ih264_inter_pred_luma_copy(UWORD8 *pu1_src,
111                                UWORD8 *pu1_dst,
112                                WORD32 src_strd,
113                                WORD32 dst_strd,
114                                WORD32 ht,
115                                WORD32 wd,
116                                UWORD8* pu1_tmp,
117                                WORD32 dydx)
118{
119    WORD32 row, col;
120    UNUSED(pu1_tmp);
121    UNUSED(dydx);
122    for(row = 0; row < ht; row++)
123    {
124        for(col = 0; col < wd; col++)
125        {
126            pu1_dst[col] = pu1_src[col];
127        }
128
129        pu1_src += src_strd;
130        pu1_dst += dst_strd;
131    }
132}
133
134/**
135 *******************************************************************************
136 *
137 * @brief
138 * Fucntion for copying to an interleaved destination
139 *
140 * @par Description:
141 *    Copies the array of width 'wd' and height 'ht' from the  location pointed
142 *    by 'src' to the location pointed by 'dst'
143 *
144 * @param[in] pu1_src
145 *  UWORD8 pointer to the source
146 *
147 * @param[out] pu1_dst
148 *  UWORD8 pointer to the destination
149 *
150 * @param[in] src_strd
151 *  integer source stride
152 *
153 * @param[in] dst_strd
154 *  integer destination stride
155 *
156 * @param[in] ht
157 *  integer height of the array
158 *
159 * @param[in] wd
160 *  integer width of the array
161 *
162 * @returns
163 *
164 * @remarks
165 *  The alternate elements of src will be copied to alternate locations in dsr
166 *  Other locations are not touched
167 *
168 *******************************************************************************
169 */
170void ih264_interleave_copy(UWORD8 *pu1_src,
171                           UWORD8 *pu1_dst,
172                           WORD32 src_strd,
173                           WORD32 dst_strd,
174                           WORD32 ht,
175                           WORD32 wd)
176{
177    WORD32 row, col;
178    wd *= 2;
179
180    for(row = 0; row < ht; row++)
181    {
182        for(col = 0; col < wd; col+=2)
183        {
184            pu1_dst[col] = pu1_src[col];
185        }
186
187        pu1_src += src_strd;
188        pu1_dst += dst_strd;
189    }
190}
191
192/**
193 *******************************************************************************
194 *
195 * @brief
196 *     Interprediction luma filter for horizontal input
197 *
198 * @par Description:
199 *    Applies a 6 tap horizontal filter .The output is  clipped to 8 bits
200 *    sec 8.4.2.2.1 titled "Luma sample interpolation process"
201 *
202 * @param[in] pu1_src
203 *  UWORD8 pointer to the source
204 *
205 * @param[out] pu1_dst
206 *  UWORD8 pointer to the destination
207 *
208 * @param[in] src_strd
209 *  integer source stride
210 *
211 * @param[in] dst_strd
212 *  integer destination stride
213 *
214 * @param[in] ht
215 *  integer height of the array
216 *
217 * @param[in] wd
218 *  integer width of the array
219 *
220 * @returns
221 *
222 * @remarks
223 *  None
224 *
225 *******************************************************************************
226 */
227void ih264_inter_pred_luma_horz(UWORD8 *pu1_src,
228                                UWORD8 *pu1_dst,
229                                WORD32 src_strd,
230                                WORD32 dst_strd,
231                                WORD32 ht,
232                                WORD32 wd,
233                                UWORD8* pu1_tmp,
234                                WORD32 dydx)
235{
236    WORD32 row, col;
237    WORD16 i2_tmp;
238    UNUSED(pu1_tmp);
239    UNUSED(dydx);
240
241    for(row = 0; row < ht; row++)
242    {
243        for(col = 0; col < wd; col++)
244        {
245            i2_tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
246            i2_tmp = ih264_g_six_tap[0] *
247                            (pu1_src[col - 2] + pu1_src[col + 3])
248                     + ih264_g_six_tap[1] *
249                            (pu1_src[col - 1] + pu1_src[col + 2])
250                     + ih264_g_six_tap[2] *
251                            (pu1_src[col] + pu1_src[col + 1]);
252            i2_tmp = (i2_tmp + 16) >> 5;
253            pu1_dst[col] = CLIP_U8(i2_tmp);
254        }
255
256        pu1_src += src_strd;
257        pu1_dst += dst_strd;
258    }
259
260}
261
262/**
263 *******************************************************************************
264 *
265 * @brief
266 *    Interprediction luma filter for vertical input
267 *
268 * @par Description:
269 *   Applies a 6 tap vertical filter.The output is  clipped to 8 bits
270 *    sec 8.4.2.2.1 titled "Luma sample interpolation process"
271 *
272 * @param[in] pu1_src
273 *  UWORD8 pointer to the source
274 *
275 * @param[out] pu1_dst
276 *  UWORD8 pointer to the destination
277 *
278 * @param[in] src_strd
279 *  integer source stride
280 *
281 * @param[in] dst_strd
282 *  integer destination stride
283 *
284 * @param[in] ht
285 *  integer height of the array
286 *
287 * @param[in] wd
288 *  integer width of the array
289 *
290 * @returns
291 *
292 * @remarks
293 *  None
294 *
295 *******************************************************************************
296 */
297void ih264_inter_pred_luma_vert(UWORD8 *pu1_src,
298                                UWORD8 *pu1_dst,
299                                WORD32 src_strd,
300                                WORD32 dst_strd,
301                                WORD32 ht,
302                                WORD32 wd,
303                                UWORD8* pu1_tmp,
304                                WORD32 dydx)
305{
306    WORD32 row, col;
307    WORD16 i2_tmp;
308    UNUSED(pu1_tmp);
309    UNUSED(dydx);
310
311    for(row = 0; row < ht; row++)
312    {
313        for(col = 0; col < wd; col++)
314        {
315            i2_tmp = 0; /*ih264_g_six_tap[] is the array containing the filter coeffs*/
316            i2_tmp = ih264_g_six_tap[0] *
317                            (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd])
318                     + ih264_g_six_tap[1] *
319                            (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd])
320                     + ih264_g_six_tap[2] *
321                            (pu1_src[col] + pu1_src[col + 1 * src_strd]);
322            i2_tmp = (i2_tmp + 16) >> 5;
323            pu1_dst[col] = CLIP_U8(i2_tmp);
324        }
325        pu1_src += src_strd;
326        pu1_dst += dst_strd;
327    }
328}
329
330/*!
331 **************************************************************************
332 * \if Function name : ih264_inter_pred_luma_horz_hpel_vert_hpel \endif
333 *
334 * \brief
335 *    This function implements a two stage cascaded six tap filter. It
336 *    applies the six tap filter in the horizontal direction on the
337 *    predictor values, followed by applying the same filter in the
338 *    vertical direction on the output of the first stage. The six tap
339 *    filtering operation is described in sec 8.4.2.2.1 titled "Luma sample
340 *    interpolation process"
341 *
342 * \param pu1_src: Pointer to the buffer containing the predictor values.
343 *     pu1_src could point to the frame buffer or the predictor buffer.
344 * \param pu1_dst: Pointer to the destination buffer where the output of
345 *     the six tap filter is stored.
346 * \param ht: Height of the rectangular pixel grid to be interpolated
347 * \param wd: Width of the rectangular pixel grid to be interpolated
348 * \param src_strd: Width of the buffer pointed to by pu1_src.
349 * \param dst_strd: Width of the destination buffer
350 * \param pu1_tmp: temporary buffer.
351 * \param dydx: x and y reference offset for qpel calculations: UNUSED in this function.
352 *
353 * \return
354 *    None.
355 *
356 * \note
357 *    This function takes the 8 bit predictor values, applies the six tap
358 *    filter in the horizontal direction and outputs the result clipped to
359 *    8 bit precision. The input is stored in the buffer pointed to by
360 *    pu1_src while the output is stored in the buffer pointed by pu1_dst.
361 *    Both pu1_src and pu1_dst could point to the same buffer i.e. the
362 *    six tap filter could be done in place.
363 *
364 **************************************************************************
365 */
366void ih264_inter_pred_luma_horz_hpel_vert_hpel(UWORD8 *pu1_src,
367                                               UWORD8 *pu1_dst,
368                                               WORD32 src_strd,
369                                               WORD32 dst_strd,
370                                               WORD32 ht,
371                                               WORD32 wd,
372                                               UWORD8* pu1_tmp,
373                                               WORD32 dydx)
374{
375    WORD32 row, col;
376    WORD32 tmp;
377    WORD16* pi2_pred1_temp;
378    WORD16* pi2_pred1;
379    UNUSED(dydx);
380    pi2_pred1_temp = (WORD16*)pu1_tmp;
381    pi2_pred1_temp += 2;
382    pi2_pred1 = pi2_pred1_temp;
383    for(row = 0; row < ht; row++)
384    {
385        for(col = -2; col < wd + 3; col++)
386        {
387            tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
388            tmp = ih264_g_six_tap[0] *
389                            (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd])
390                  + ih264_g_six_tap[1] *
391                            (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd])
392                  + ih264_g_six_tap[2] *
393                            (pu1_src[col] + pu1_src[col + 1 * src_strd]);
394            pi2_pred1_temp[col] = tmp;
395        }
396        pu1_src += src_strd;
397        pi2_pred1_temp = pi2_pred1_temp + wd + 5;
398    }
399
400    for(row = 0; row < ht; row++)
401    {
402        for(col = 0; col < wd; col++)
403        {
404            tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
405            tmp = ih264_g_six_tap[0] *
406                            (pi2_pred1[col - 2] + pi2_pred1[col + 3])
407                  + ih264_g_six_tap[1] *
408                            (pi2_pred1[col - 1] + pi2_pred1[col + 2])
409                  + ih264_g_six_tap[2] * (pi2_pred1[col] + pi2_pred1[col + 1]);
410            tmp = (tmp + 512) >> 10;
411            pu1_dst[col] = CLIP_U8(tmp);
412        }
413        pi2_pred1 += (wd + 5);
414        pu1_dst += dst_strd;
415    }
416}
417
418/*!
419 **************************************************************************
420 * \if Function name : ih264_inter_pred_luma_horz_qpel \endif
421 *
422 * \brief
423 *    This routine applies the six tap filter to the predictors in the
424 *    horizontal direction. The six tap filtering operation is described in
425 *    sec 8.4.2.2.1 titled "Luma sample interpolation process"
426 *
427 * \param pu1_src: Pointer to the buffer containing the predictor values.
428 *     pu1_src could point to the frame buffer or the predictor buffer.
429 * \param pu1_dst: Pointer to the destination buffer where the output of
430 *     the six tap filter is stored.
431 * \param ht: Height of the rectangular pixel grid to be interpolated
432 * \param wd: Width of the rectangular pixel grid to be interpolated
433 * \param src_strd: Width of the buffer pointed to by pu1_src.
434 * \param dst_strd: Width of the destination buffer
435 * \param pu1_tmp: temporary buffer: UNUSED in this function
436 * \param dydx: x and y reference offset for qpel calculations.
437 *
438 * \return
439 *    None.
440 *
441 * \note
442 *    This function takes the 8 bit predictor values, applies the six tap
443 *    filter in the horizontal direction and outputs the result clipped to
444 *    8 bit precision. The input is stored in the buffer pointed to by
445 *    pu1_src while the output is stored in the buffer pointed by pu1_dst.
446 *    Both pu1_src and pu1_dst could point to the same buffer i.e. the
447 *    six tap filter could be done in place.
448 *
449 **************************************************************************
450 */
451void ih264_inter_pred_luma_horz_qpel(UWORD8 *pu1_src,
452                                     UWORD8 *pu1_dst,
453                                     WORD32 src_strd,
454                                     WORD32 dst_strd,
455                                     WORD32 ht,
456                                     WORD32 wd,
457                                     UWORD8* pu1_tmp,
458                                     WORD32 dydx)
459{
460    WORD32 row, col;
461    UWORD8 *pu1_pred1;
462    WORD32 x_offset = dydx & 0x3;
463    UNUSED(pu1_tmp);
464    pu1_pred1 = pu1_src + (x_offset >> 1);
465
466    for(row = 0; row < ht; row++)
467    {
468        for(col = 0; col < wd; col++, pu1_src++, pu1_dst++)
469        {
470            WORD16 i2_temp;
471            /* The logic below implements the following equation
472             i2_temp = puc_pred[-2] - 5 * (puc_pred[-1] + puc_pred[2]) +
473             20 * (puc_pred[0] + puc_pred[1]) + puc_pred[3]; */
474            i2_temp = pu1_src[-2] + pu1_src[3]
475                      - (pu1_src[-1] + pu1_src[2])
476                      + ((pu1_src[0] + pu1_src[1] - pu1_src[-1] - pu1_src[2]) << 2)
477                      + ((pu1_src[0] + pu1_src[1]) << 4);
478            i2_temp = (i2_temp + 16) >> 5;
479            i2_temp = CLIP_U8(i2_temp);
480            *pu1_dst = (i2_temp + *pu1_pred1 + 1) >> 1;
481
482            pu1_pred1++;
483        }
484        pu1_dst += dst_strd - wd;
485        pu1_src += src_strd - wd;
486        pu1_pred1 += src_strd - wd;
487    }
488}
489
490/*!
491 **************************************************************************
492 * \if Function name : ih264_inter_pred_luma_vert_qpel \endif
493 *
494 * \brief
495 *    This routine applies the six tap filter to the predictors in the
496 *    vertical direction and interpolates them to obtain pixels at quarter vertical
497 *    positions (0, 1/4) and (0, 3/4). The six tap filtering operation is
498 *    described in sec 8.4.2.2.1 titled "Luma sample interpolation process"
499 *
500 * \param pu1_src: Pointer to the buffer containing the predictor values.
501 *     pu1_src could point to the frame buffer or the predictor buffer.
502 * \param pu1_dst: Pointer to the destination buffer where the output of
503 *     the six tap filter is stored.
504 * \param ht: Height of the rectangular pixel grid to be interpolated
505 * \param wd: Width of the rectangular pixel grid to be interpolated
506 * \param src_strd: Width of the buffer pointed to by puc_pred.
507 * \param dst_strd: Width of the destination buffer
508 * \param pu1_tmp: temporary buffer: UNUSED in this function
509 * \param dydx: x and y reference offset for qpel calculations.
510 *
511 * \return
512 *    void
513 *
514 * \note
515 *    This function takes the 8 bit predictor values, applies the six tap
516 *    filter in the vertical direction and outputs the result clipped to
517 *    8 bit precision. The input is stored in the buffer pointed to by
518 *    puc_pred while the output is stored in the buffer pointed by puc_dest.
519 *    Both puc_pred and puc_dest could point to the same buffer i.e. the
520 *    six tap filter could be done in place.
521 *
522 * \para <title>
523 *    <paragraph>
524 *  ...
525 **************************************************************************
526 */
527void ih264_inter_pred_luma_vert_qpel(UWORD8 *pu1_src,
528                                     UWORD8 *pu1_dst,
529                                     WORD32 src_strd,
530                                     WORD32 dst_strd,
531                                     WORD32 ht,
532                                     WORD32 wd,
533                                     UWORD8* pu1_tmp,
534                                     WORD32 dydx)
535{
536    WORD32 row, col;
537    WORD32 y_offset = dydx >> 2;
538    WORD32 off1, off2, off3;
539    UWORD8 *pu1_pred1;
540    UNUSED(pu1_tmp);
541    y_offset = y_offset & 0x3;
542
543    off1 = src_strd;
544    off2 = src_strd << 1;
545    off3 = off1 + off2;
546
547    pu1_pred1 = pu1_src + (y_offset >> 1) * src_strd;
548
549    for(row = 0; row < ht; row++)
550    {
551        for(col = 0; col < wd; col++, pu1_dst++, pu1_src++, pu1_pred1++)
552        {
553            WORD16 i2_temp;
554            /* The logic below implements the following equation
555             i16_temp = puc_pred[-2*src_strd] + puc_pred[3*src_strd] -
556             5 * (puc_pred[-1*src_strd] + puc_pred[2*src_strd])  +
557             20 * (puc_pred[0] + puc_pred[src_strd]); */
558            i2_temp = pu1_src[-off2] + pu1_src[off3]
559                       - (pu1_src[-off1] + pu1_src[off2])
560                       + ((pu1_src[0] + pu1_src[off1] - pu1_src[-off1] - pu1_src[off2]) << 2)
561                       + ((pu1_src[0] + pu1_src[off1]) << 4);
562            i2_temp = (i2_temp + 16) >> 5;
563            i2_temp = CLIP_U8(i2_temp);
564
565            *pu1_dst = (i2_temp + *pu1_pred1 + 1) >> 1;
566        }
567        pu1_src += src_strd - wd;
568        pu1_pred1 += src_strd - wd;
569        pu1_dst += dst_strd - wd;
570    }
571}
572
573/*!
574 **************************************************************************
575 * \if Function name : ih264_inter_pred_luma_horz_qpel_vert_qpel \endif
576 *
577 * \brief
578 *    This routine applies the six tap filter to the predictors in the
579 *    vertical and horizontal direction and averages them to get pixels at locations
580 *    (1/4,1/4), (1/4, 3/4), (3/4, 1/4) & (3/4, 3/4). The six tap filtering operation
581 *    is described in sec 8.4.2.2.1 titled "Luma sample interpolation process"
582 *
583 * \param pu1_src: Pointer to the buffer containing the predictor values.
584 *     pu1_src could point to the frame buffer or the predictor buffer.
585 * \param pu1_dst: Pointer to the destination buffer where the output of
586 *     the six tap filter is stored.
587 * \param wd: Width of the rectangular pixel grid to be interpolated
588 * \param ht: Height of the rectangular pixel grid to be interpolated
589 * \param src_strd: Width of the buffer pointed to by puc_pred.
590 * \param dst_strd: Width of the destination buffer
591 * \param pu1_tmp: temporary buffer, UNUSED in this function
592 * \param dydx: x and y reference offset for qpel calculations.
593 *
594 * \return
595 *    void
596 *
597 * \note
598 *    This function takes the 8 bit predictor values, applies the six tap
599 *    filter in the vertical direction and outputs the result clipped to
600 *    8 bit precision. The input is stored in the buffer pointed to by
601 *    puc_pred while the output is stored in the buffer pointed by puc_dest.
602 *    Both puc_pred and puc_dest could point to the same buffer i.e. the
603 *    six tap filter could be done in place.
604 *
605 * \para <title>
606 *    <paragraph>
607 *  ...
608 **************************************************************************
609 */
610void ih264_inter_pred_luma_horz_qpel_vert_qpel(UWORD8 *pu1_src,
611                                               UWORD8 *pu1_dst,
612                                               WORD32 src_strd,
613                                               WORD32 dst_strd,
614                                               WORD32 ht,
615                                               WORD32 wd,
616                                               UWORD8* pu1_tmp,
617                                               WORD32 dydx)
618{
619    WORD32 row, col;
620    WORD32 x_offset = dydx & 0x3;
621    WORD32 y_offset = dydx >> 2;
622
623    WORD32 off1, off2, off3;
624    UWORD8* pu1_pred_vert, *pu1_pred_horz;
625    UNUSED(pu1_tmp);
626    y_offset = y_offset & 0x3;
627
628    off1 = src_strd;
629    off2 = src_strd << 1;
630    off3 = off1 + off2;
631
632    pu1_pred_horz = pu1_src + (y_offset >> 1) * src_strd;
633    pu1_pred_vert = pu1_src + (x_offset >> 1);
634
635    for(row = 0; row < ht; row++)
636    {
637        for(col = 0; col < wd;
638                        col++, pu1_dst++, pu1_pred_vert++, pu1_pred_horz++)
639        {
640            WORD16 i2_temp_vert, i2_temp_horz;
641            /* The logic below implements the following equation
642             i2_temp = puc_pred[-2*src_strd] + puc_pred[3*src_strd] -
643             5 * (puc_pred[-1*src_strd] + puc_pred[2*src_strd])  +
644             20 * (puc_pred[0] + puc_pred[src_strd]); */
645            i2_temp_vert = pu1_pred_vert[-off2] + pu1_pred_vert[off3]
646                            - (pu1_pred_vert[-off1] + pu1_pred_vert[off2])
647                            + ((pu1_pred_vert[0] + pu1_pred_vert[off1]
648                                            - pu1_pred_vert[-off1]
649                                            - pu1_pred_vert[off2]) << 2)
650                            + ((pu1_pred_vert[0] + pu1_pred_vert[off1]) << 4);
651            i2_temp_vert = (i2_temp_vert + 16) >> 5;
652            i2_temp_vert = CLIP_U8(i2_temp_vert);
653
654            /* The logic below implements the following equation
655             i16_temp = puc_pred[-2] - 5 * (puc_pred[-1] + puc_pred[2]) +
656             20 * (puc_pred[0] + puc_pred[1]) + puc_pred[3]; */
657            i2_temp_horz = pu1_pred_horz[-2] + pu1_pred_horz[3]
658                            - (pu1_pred_horz[-1] + pu1_pred_horz[2])
659                            + ((pu1_pred_horz[0] + pu1_pred_horz[1]
660                                            - pu1_pred_horz[-1]
661                                            - pu1_pred_horz[2]) << 2)
662                            + ((pu1_pred_horz[0] + pu1_pred_horz[1]) << 4);
663            i2_temp_horz = (i2_temp_horz + 16) >> 5;
664            i2_temp_horz = CLIP_U8(i2_temp_horz);
665            *pu1_dst = (i2_temp_vert + i2_temp_horz + 1) >> 1;
666        }
667        pu1_pred_vert += (src_strd - wd);
668        pu1_pred_horz += (src_strd - wd);
669        pu1_dst += (dst_strd - wd);
670    }
671}
672
673/*!
674 **************************************************************************
675 * \if Function name : ih264_inter_pred_luma_horz_qpel_vert_hpel \endif
676 *
677 * \brief
678 *    This routine applies the six tap filter to the predictors in the vertical
679 *    and horizontal direction to obtain the pixel at (1/2,1/2). It then interpolates
680 *    pixel at (0,1/2) and (1/2,1/2) to obtain pixel at (1/4,1/2). Similarly for (3/4,1/2).
681 *    The six tap filtering operation is described in sec 8.4.2.2.1 titled
682 *    "Luma sample interpolation process"
683 *
684 * \param pu1_src: Pointer to the buffer containing the predictor values.
685 *     pu1_src could point to the frame buffer or the predictor buffer.
686 * \param pu1_dst: Pointer to the destination buffer where the output of
687 *     the six tap filter followed by interpolation is stored.
688 * \param wd: Width of the rectangular pixel grid to be interpolated
689 * \param ht: Height of the rectangular pixel grid to be interpolated
690 * \param src_strd: Width of the buffer pointed to by puc_pred.
691 * \param dst_strd: Width of the destination buffer
692 * \param pu1_tmp: buffer to store temporary output after 1st 6-tap filter.
693 * \param dydx: x and y reference offset for qpel calculations.
694 *
695 * \return
696 *    void
697 *
698 * \note
699 *    This function takes the 8 bit predictor values, applies the six tap
700 *    filter in the vertical direction and outputs the result clipped to
701 *    8 bit precision. The input is stored in the buffer pointed to by
702 *    puc_pred while the output is stored in the buffer pointed by puc_dest.
703 *    Both puc_pred and puc_dest could point to the same buffer i.e. the
704 *    six tap filter could be done in place.
705 *
706 * \para <title>
707 *    <paragraph>
708 *  ...
709 **************************************************************************
710 */
711void ih264_inter_pred_luma_horz_qpel_vert_hpel(UWORD8 *pu1_src,
712                                               UWORD8 *pu1_dst,
713                                               WORD32 src_strd,
714                                               WORD32 dst_strd,
715                                               WORD32 ht,
716                                               WORD32 wd,
717                                               UWORD8* pu1_tmp,
718                                               WORD32 dydx)
719{
720    WORD32 row, col;
721    WORD32 tmp;
722    WORD16* pi2_pred1_temp, *pi2_pred1;
723    UWORD8* pu1_dst_tmp;
724    WORD32 x_offset = dydx & 0x3;
725    WORD16 i2_macro;
726
727    pi2_pred1_temp = (WORD16*)pu1_tmp;
728    pi2_pred1_temp += 2;
729    pi2_pred1 = pi2_pred1_temp;
730    pu1_dst_tmp = pu1_dst;
731
732    for(row = 0; row < ht; row++)
733    {
734        for(col = -2; col < wd + 3; col++)
735        {
736            tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
737            tmp = ih264_g_six_tap[0] *
738                            (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd])
739                  + ih264_g_six_tap[1] *
740                            (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd])
741                  + ih264_g_six_tap[2] *
742                            (pu1_src[col] + pu1_src[col + 1 * src_strd]);
743            pi2_pred1_temp[col] = tmp;
744        }
745
746        pu1_src += src_strd;
747        pi2_pred1_temp = pi2_pred1_temp + wd + 5;
748    }
749
750    pi2_pred1_temp = pi2_pred1;
751    for(row = 0; row < ht; row++)
752    {
753        for(col = 0; col < wd; col++)
754        {
755            tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
756            tmp = ih264_g_six_tap[0] *
757                            (pi2_pred1[col - 2] + pi2_pred1[col + 3])
758                  + ih264_g_six_tap[1] *
759                            (pi2_pred1[col - 1] + pi2_pred1[col + 2])
760                  + ih264_g_six_tap[2] *
761                            (pi2_pred1[col] + pi2_pred1[col + 1]);
762            tmp = (tmp + 512) >> 10;
763            pu1_dst[col] = CLIP_U8(tmp);
764        }
765        pi2_pred1 += (wd + 5);
766        pu1_dst += dst_strd;
767    }
768
769    pu1_dst = pu1_dst_tmp;
770    pi2_pred1_temp += (x_offset >> 1);
771    for(row = ht; row != 0; row--)
772    {
773        for(col = wd; col != 0; col--, pu1_dst++, pi2_pred1_temp++)
774        {
775            UWORD8 uc_temp;
776            /* Clipping the output of the six tap filter obtained from the
777             first stage of the 2d filter stage */
778            *pi2_pred1_temp = (*pi2_pred1_temp + 16) >> 5;
779            i2_macro = (*pi2_pred1_temp);
780            uc_temp = CLIP_U8(i2_macro);
781            *pu1_dst = (*pu1_dst + uc_temp + 1) >> 1;
782        }
783        pi2_pred1_temp += 5;
784        pu1_dst += dst_strd - wd;
785    }
786}
787
788/*!
789 **************************************************************************
790 * \if Function name : ih264_inter_pred_luma_horz_hpel_vert_qpel \endif
791 *
792 * \brief
793 *    This routine applies the six tap filter to the predictors in the horizontal
794 *    and vertical direction to obtain the pixel at (1/2,1/2). It then interpolates
795 *    pixel at (1/2,0) and (1/2,1/2) to obtain pixel at (1/2,1/4). Similarly for (1/2,3/4).
796 *    The six tap filtering operation is described in sec 8.4.2.2.1 titled
797 *    "Luma sample interpolation process"
798 *
799 * \param pu1_src: Pointer to the buffer containing the predictor values.
800 *     pu1_src could point to the frame buffer or the predictor buffer.
801 * \param pu1_dst: Pointer to the destination buffer where the output of
802 *     the six tap filter followed by interpolation is stored.
803 * \param wd: Width of the rectangular pixel grid to be interpolated
804 * \param ht: Height of the rectangular pixel grid to be interpolated
805 * \param src_strd: Width of the buffer pointed to by puc_pred.
806 * \param dst_strd: Width of the destination buffer
807 * \param pu1_tmp: buffer to store temporary output after 1st 6-tap filter.
808 * \param dydx: x and y reference offset for qpel calculations.
809 *
810 * \return
811 *    void
812 *
813 * \note
814 *    This function takes the 8 bit predictor values, applies the six tap
815 *    filter in the vertical direction and outputs the result clipped to
816 *    8 bit precision. The input is stored in the buffer pointed to by
817 *    puc_pred while the output is stored in the buffer pointed by puc_dest.
818 *    Both puc_pred and puc_dest could point to the same buffer i.e. the
819 *    six tap filter could be done in place.
820 *
821 * \para <title>
822 *    <paragraph>
823 *  ...
824 **************************************************************************
825 */
826void ih264_inter_pred_luma_horz_hpel_vert_qpel(UWORD8 *pu1_src,
827                                               UWORD8 *pu1_dst,
828                                               WORD32 src_strd,
829                                               WORD32 dst_strd,
830                                               WORD32 ht,
831                                               WORD32 wd,
832                                               UWORD8* pu1_tmp,
833                                               WORD32 dydx)
834{
835
836    WORD32 row, col;
837    WORD32 tmp;
838    WORD32 y_offset = dydx >> 2;
839    WORD16* pi2_pred1_temp, *pi2_pred1;
840    UWORD8* pu1_dst_tmp;
841    //WORD32 x_offset = dydx & 0x3;
842    WORD16 i2_macro;
843
844    y_offset = y_offset & 0x3;
845
846    pi2_pred1_temp = (WORD16*)pu1_tmp;
847    pi2_pred1_temp += 2 * wd;
848    pi2_pred1 = pi2_pred1_temp;
849    pu1_dst_tmp = pu1_dst;
850    pu1_src -= 2 * src_strd;
851    for(row = -2; row < ht + 3; row++)
852    {
853        for(col = 0; col < wd; col++)
854        {
855            tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
856            tmp = ih264_g_six_tap[0] * (pu1_src[col - 2] + pu1_src[col + 3])
857                  + ih264_g_six_tap[1] * (pu1_src[col - 1] + pu1_src[col + 2])
858                  + ih264_g_six_tap[2] * (pu1_src[col] + pu1_src[col + 1]);
859            pi2_pred1_temp[col - 2 * wd] = tmp;
860        }
861
862        pu1_src += src_strd;
863        pi2_pred1_temp += wd;
864    }
865    pi2_pred1_temp = pi2_pred1;
866    for(row = 0; row < ht; row++)
867    {
868        for(col = 0; col < wd; col++)
869        {
870            tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/
871            tmp = ih264_g_six_tap[0] * (pi2_pred1[col - 2 * wd] + pi2_pred1[col + 3 * wd])
872                  + ih264_g_six_tap[1] * (pi2_pred1[col - 1 * wd] + pi2_pred1[col + 2 * wd])
873                  + ih264_g_six_tap[2] * (pi2_pred1[col] + pi2_pred1[col + 1 * wd]);
874            tmp = (tmp + 512) >> 10;
875            pu1_dst[col] = CLIP_U8(tmp);
876        }
877        pi2_pred1 += wd;
878        pu1_dst += dst_strd;
879    }
880    pu1_dst = pu1_dst_tmp;
881    pi2_pred1_temp += (y_offset >> 1) * wd;
882    for(row = ht; row != 0; row--)
883
884    {
885        for(col = wd; col != 0; col--, pu1_dst++, pi2_pred1_temp++)
886        {
887            UWORD8 u1_temp;
888            /* Clipping the output of the six tap filter obtained from the
889             first stage of the 2d filter stage */
890            *pi2_pred1_temp = (*pi2_pred1_temp + 16) >> 5;
891            i2_macro = (*pi2_pred1_temp);
892            u1_temp = CLIP_U8(i2_macro);
893            *pu1_dst = (*pu1_dst + u1_temp + 1) >> 1;
894        }
895        //pi16_pred1_temp += wd;
896        pu1_dst += dst_strd - wd;
897    }
898}
899
900/**
901 *******************************************************************************
902 *  function:ih264_inter_pred_luma_bilinear
903 *
904 * @brief
905 *    This routine applies the bilinear filter to the predictors .
906 *    The  filtering operation is described in
907 *    sec 8.4.2.2.1 titled "Luma sample interpolation process"
908 *
909 * @par Description:
910\note
911 *     This function is called to obtain pixels lying at the following
912 *    locations (1/4,1), (3/4,1),(1,1/4), (1,3/4) ,(1/4,1/2), (3/4,1/2),(1/2,1/4), (1/2,3/4),(3/4,1/4),(1/4,3/4),(3/4,3/4)&& (1/4,1/4) .
913 *    The function averages the two adjacent values from the two input arrays in horizontal direction.
914 *
915 *
916 * @param[in] pu1_src1:
917 *  UWORD8 Pointer to the buffer containing the first input array.
918 *
919 * @param[in] pu1_src2:
920 *  UWORD8 Pointer to the buffer containing the second input array.
921 *
922 * @param[out] pu1_dst
923 *  UWORD8 pointer to the destination where the output of bilinear filter is stored.
924 *
925 * @param[in] src_strd1
926 *  Stride of the first input buffer
927 *
928 * @param[in] src_strd2
929 *  Stride of the second input buffer
930 *
931 * @param[in] dst_strd
932 *  integer destination stride of pu1_dst
933 *
934 * @param[in] ht
935 *  integer height of the array
936 *
937 * @param[in] wd
938 *  integer width of the array
939 *
940 * @returns
941 *
942 * @remarks
943 *  None
944 *
945 *******************************************************************************
946 */
947void ih264_inter_pred_luma_bilinear(UWORD8 *pu1_src1,
948                                    UWORD8 *pu1_src2,
949                                    UWORD8 *pu1_dst,
950                                    WORD32 src_strd1,
951                                    WORD32 src_strd2,
952                                    WORD32 dst_strd,
953                                    WORD32 ht,
954                                    WORD32 wd)
955{
956    WORD32 row, col;
957    WORD16 i2_tmp;
958
959    for(row = 0; row < ht; row++)
960    {
961        for(col = 0; col < wd; col++)
962        {
963            i2_tmp = pu1_src1[col] + pu1_src2[col];
964            i2_tmp = (i2_tmp + 1) >> 1;
965            pu1_dst[col] = CLIP_U8(i2_tmp);
966        }
967        pu1_src1 += src_strd1;
968        pu1_src2 += src_strd2;
969        pu1_dst += dst_strd;
970    }
971
972}
973
974/**
975 *******************************************************************************
976 *
977 * @brief
978 *    Interprediction chroma filter
979 *
980 * @par Description:
981 *   Applies filtering to chroma samples as mentioned in
982 *    sec 8.4.2.2.2 titled "chroma sample interpolation process"
983 *
984 * @param[in] pu1_src
985 *  UWORD8 pointer to the source containing alternate U and V samples
986 *
987 * @param[out] pu1_dst
988 *  UWORD8 pointer to the destination
989 *
990 * @param[in] src_strd
991 *  integer source stride
992 *
993 * @param[in] dst_strd
994 *  integer destination stride
995 *
996 * @param[in] u1_dx
997 *  dx value where the sample is to be produced(refer sec 8.4.2.2.2 )
998 *
999 * @param[in] u1_dy
1000 *  dy value where the sample is to be produced(refer sec 8.4.2.2.2 )
1001 *
1002 * @param[in] ht
1003 *  integer height of the array
1004 *
1005 * @param[in] wd
1006 *  integer width of the array
1007 *
1008 * @returns
1009 *
1010 * @remarks
1011 *  None
1012 *
1013 *******************************************************************************
1014 */
1015void ih264_inter_pred_chroma(UWORD8 *pu1_src,
1016                             UWORD8 *pu1_dst,
1017                             WORD32 src_strd,
1018                             WORD32 dst_strd,
1019                             WORD32 dx,
1020                             WORD32 dy,
1021                             WORD32 ht,
1022                             WORD32 wd)
1023{
1024    WORD32 row, col;
1025    WORD16 i2_tmp;
1026
1027    for(row = 0; row < ht; row++)
1028    {
1029        for(col = 0; col < 2 * wd; col++)
1030        {
1031            i2_tmp = 0; /* applies equation (8-266) in section 8.4.2.2.2 */
1032            i2_tmp = (8 - dx) * (8 - dy) * pu1_src[col]
1033                     + (dx) * (8 - dy) * pu1_src[col + 2]
1034                     + (8 - dx) * (dy) * (pu1_src + src_strd)[col]
1035                     + (dx) * (dy) * (pu1_src + src_strd)[col + 2];
1036            i2_tmp = (i2_tmp + 32) >> 6;
1037            pu1_dst[col] = CLIP_U8(i2_tmp);
1038        }
1039        pu1_src += src_strd;
1040        pu1_dst += dst_strd;
1041    }
1042}
1043