1/******************************************************************************
2*
3* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*
5* Licensed under the Apache License, Version 2.0 (the "License");
6* you may not use this file except in compliance with the License.
7* You may obtain a copy of the License at:
8*
9* http://www.apache.org/licenses/LICENSE-2.0
10*
11* Unless required by applicable law or agreed to in writing, software
12* distributed under the License is distributed on an "AS IS" BASIS,
13* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14* See the License for the specific language governing permissions and
15* limitations under the License.
16*
17******************************************************************************/
18/**
19*******************************************************************************
20* @file
21*  ihevc_padding_atom_intr.c
22*
23* @brief
24*  Contains function definitions for Padding
25*
26* @author
27*  Srinivas T
28*
29* @par List of Functions:
30*   - ihevc_pad_left_luma_ssse3()
31*   - ihevc_pad_left_chroma_ssse3()
32*   - ihevc_pad_right_luma_ssse3()
33*   - ihevc_pad_right_chroma_ssse3()
34*
35* @remarks
36*  None
37*
38*******************************************************************************
39*/
40
41#include <string.h>
42#include <assert.h>
43#include "ihevc_typedefs.h"
44#include "ihevc_func_selector.h"
45#include "ihevc_platform_macros.h"
46#include "ihevc_mem_fns.h"
47#include "ihevc_debug.h"
48
49#include <immintrin.h>
50
51
52/**
53*******************************************************************************
54*
55* @brief
56*   Padding (luma block) at the left of a 2d array
57*
58* @par Description:
59*   The left column of a 2d array is replicated for pad_size times at the left
60*
61*
62* @param[in] pu1_src
63*  UWORD8 pointer to the source
64*
65* @param[in] src_strd
66*  integer source stride
67*
68* @param[in] ht
69*  integer height of the array
70*
71* @param[in] wd
72*  integer width of the array
73*
74* @param[in] pad_size
75*  integer -padding size of the array
76*
77* @param[in] ht
78*  integer height of the array
79*
80* @param[in] wd
81*  integer width of the array
82*
83* @returns
84*
85* @remarks
86*  None
87*
88*******************************************************************************
89*/
90
91void ihevc_pad_left_luma_ssse3(UWORD8 *pu1_src,
92                               WORD32 src_strd,
93                               WORD32 ht,
94                               WORD32 pad_size)
95{
96    WORD32 row;
97    WORD32 i;
98    UWORD8 *pu1_dst;
99    __m128i const0_16x8b;
100
101    const0_16x8b = _mm_setzero_si128();
102
103    ASSERT(pad_size % 8 == 0);
104
105    for(row = 0; row < ht; row++)
106    {
107        __m128i src_temp0_16x8b;
108
109        src_temp0_16x8b =  _mm_loadu_si128((__m128i *)pu1_src);
110        pu1_dst = pu1_src - pad_size;
111        src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
112        for(i = 0; i < pad_size; i += 8)
113        {
114            _mm_storel_epi64((__m128i *)(pu1_dst + i), src_temp0_16x8b);
115        }
116        pu1_src += src_strd;
117    }
118
119}
120
121
122
123/**
124*******************************************************************************
125*
126* @brief
127*   Padding (chroma block) at the left of a 2d array
128*
129* @par Description:
130*   The left column of a 2d array is replicated for pad_size times at the left
131*
132*
133* @param[in] pu1_src
134*  UWORD8 pointer to the source
135*
136* @param[in] src_strd
137*  integer source stride
138*
139* @param[in] ht
140*  integer height of the array
141*
142* @param[in] wd
143*  integer width of the array (each colour component)
144*
145* @param[in] pad_size
146*  integer -padding size of the array
147*
148* @param[in] ht
149*  integer height of the array
150*
151* @param[in] wd
152*  integer width of the array
153*
154* @returns
155*
156* @remarks
157*  None
158*
159*******************************************************************************
160*/
161
162void ihevc_pad_left_chroma_ssse3(UWORD8 *pu1_src,
163                                 WORD32 src_strd,
164                                 WORD32 ht,
165                                 WORD32 pad_size)
166{
167    WORD32 row;
168    WORD32 col;
169    UWORD8 *pu1_dst;
170    __m128i const0_16x8b, const1_16x8b;
171    const0_16x8b = _mm_setzero_si128();
172    const1_16x8b = _mm_set1_epi8(1);
173    const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b);
174
175    ASSERT(pad_size % 8 == 0);
176    for(row = 0; row < ht; row++)
177    {
178        __m128i src_temp0_16x8b;
179
180        src_temp0_16x8b =  _mm_loadu_si128((__m128i *)pu1_src);
181        pu1_dst = pu1_src - pad_size;
182        src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
183
184        for(col = 0; col < pad_size; col += 8)
185        {
186            _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
187        }
188        pu1_src += src_strd;
189    }
190
191}
192
193
194
195/**
196*******************************************************************************
197*
198* @brief
199* Padding (luma block) at the right of a 2d array
200*
201* @par Description:
202* The right column of a 2d array is replicated for pad_size times at the right
203*
204*
205* @param[in] pu1_src
206*  UWORD8 pointer to the source
207*
208* @param[in] src_strd
209*  integer source stride
210*
211* @param[in] ht
212*  integer height of the array
213*
214* @param[in] wd
215*  integer width of the array
216*
217* @param[in] pad_size
218*  integer -padding size of the array
219*
220* @param[in] ht
221*  integer height of the array
222*
223* @param[in] wd
224*  integer width of the array
225*
226* @returns
227*
228* @remarks
229*  None
230*
231*******************************************************************************
232*/
233
234void ihevc_pad_right_luma_ssse3(UWORD8 *pu1_src,
235                                WORD32 src_strd,
236                                WORD32 ht,
237                                WORD32 pad_size)
238{
239    WORD32 row;
240    WORD32 col;
241    UWORD8 *pu1_dst;
242    __m128i const0_16x8b;
243
244    ASSERT(pad_size % 8 == 0);
245
246    for(row = 0; row < ht; row++)
247    {
248        __m128i src_temp0_16x8b;
249
250        src_temp0_16x8b =  _mm_loadu_si128((__m128i *)(pu1_src - 1));
251        const0_16x8b = _mm_setzero_si128();
252        pu1_dst = pu1_src;
253        src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
254        for(col = 0; col < pad_size; col += 8)
255        {
256            _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
257        }
258        pu1_src += src_strd;
259    }
260
261}
262
263
264
265/**
266*******************************************************************************
267*
268* @brief
269* Padding (chroma block) at the right of a 2d array
270*
271* @par Description:
272* The right column of a 2d array is replicated for pad_size times at the right
273*
274*
275* @param[in] pu1_src
276*  UWORD8 pointer to the source
277*
278* @param[in] src_strd
279*  integer source stride
280*
281* @param[in] ht
282*  integer height of the array
283*
284* @param[in] wd
285*  integer width of the array (each colour component)
286*
287* @param[in] pad_size
288*  integer -padding size of the array
289*
290* @param[in] ht
291*  integer height of the array
292*
293* @param[in] wd
294*  integer width of the array
295*
296* @returns
297*
298* @remarks
299*  None
300*
301*******************************************************************************
302*/
303
304void ihevc_pad_right_chroma_ssse3(UWORD8 *pu1_src,
305                                  WORD32 src_strd,
306                                  WORD32 ht,
307                                  WORD32 pad_size)
308{
309    WORD32 row;
310    WORD32 col;
311    UWORD8 *pu1_dst;
312    __m128i const0_16x8b, const1_16x8b;
313    const0_16x8b = _mm_setzero_si128();
314    const1_16x8b = _mm_set1_epi8(1);
315    const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b);
316
317    ASSERT(pad_size % 8 == 0);
318
319    for(row = 0; row < ht; row++)
320    {
321        __m128i src_temp0_16x8b;
322
323        src_temp0_16x8b =  _mm_loadu_si128((__m128i *)(pu1_src - 2));
324        pu1_dst = pu1_src;
325        src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
326        for(col = 0; col < pad_size; col += 8)
327        {
328            _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
329        }
330
331        pu1_src += src_strd;
332    }
333}
334
335