1/****************************************************************************** 2* 3* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4* 5* Licensed under the Apache License, Version 2.0 (the "License"); 6* you may not use this file except in compliance with the License. 7* You may obtain a copy of the License at: 8* 9* http://www.apache.org/licenses/LICENSE-2.0 10* 11* Unless required by applicable law or agreed to in writing, software 12* distributed under the License is distributed on an "AS IS" BASIS, 13* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14* See the License for the specific language governing permissions and 15* limitations under the License. 16* 17******************************************************************************/ 18/** 19******************************************************************************* 20* @file 21* ihevc_padding_atom_intr.c 22* 23* @brief 24* Contains function definitions for Padding 25* 26* @author 27* Srinivas T 28* 29* @par List of Functions: 30* - ihevc_pad_left_luma_ssse3() 31* - ihevc_pad_left_chroma_ssse3() 32* - ihevc_pad_right_luma_ssse3() 33* - ihevc_pad_right_chroma_ssse3() 34* 35* @remarks 36* None 37* 38******************************************************************************* 39*/ 40 41#include <string.h> 42#include <assert.h> 43#include "ihevc_typedefs.h" 44#include "ihevc_func_selector.h" 45#include "ihevc_platform_macros.h" 46#include "ihevc_mem_fns.h" 47#include "ihevc_debug.h" 48 49#include <immintrin.h> 50 51 52/** 53******************************************************************************* 54* 55* @brief 56* Padding (luma block) at the left of a 2d array 57* 58* @par Description: 59* The left column of a 2d array is replicated for pad_size times at the left 60* 61* 62* @param[in] pu1_src 63* UWORD8 pointer to the source 64* 65* @param[in] src_strd 66* integer source stride 67* 68* @param[in] ht 69* integer height of the array 70* 71* @param[in] wd 72* integer width of the array 73* 74* @param[in] pad_size 75* integer -padding size of the array 76* 77* @param[in] ht 78* integer height of the array 79* 80* @param[in] wd 81* integer width of the array 82* 83* @returns 84* 85* @remarks 86* None 87* 88******************************************************************************* 89*/ 90 91void ihevc_pad_left_luma_ssse3(UWORD8 *pu1_src, 92 WORD32 src_strd, 93 WORD32 ht, 94 WORD32 pad_size) 95{ 96 WORD32 row; 97 WORD32 i; 98 UWORD8 *pu1_dst; 99 __m128i const0_16x8b; 100 101 const0_16x8b = _mm_setzero_si128(); 102 103 ASSERT(pad_size % 8 == 0); 104 105 for(row = 0; row < ht; row++) 106 { 107 __m128i src_temp0_16x8b; 108 109 src_temp0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); 110 pu1_dst = pu1_src - pad_size; 111 src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b); 112 for(i = 0; i < pad_size; i += 8) 113 { 114 _mm_storel_epi64((__m128i *)(pu1_dst + i), src_temp0_16x8b); 115 } 116 pu1_src += src_strd; 117 } 118 119} 120 121 122 123/** 124******************************************************************************* 125* 126* @brief 127* Padding (chroma block) at the left of a 2d array 128* 129* @par Description: 130* The left column of a 2d array is replicated for pad_size times at the left 131* 132* 133* @param[in] pu1_src 134* UWORD8 pointer to the source 135* 136* @param[in] src_strd 137* integer source stride 138* 139* @param[in] ht 140* integer height of the array 141* 142* @param[in] wd 143* integer width of the array (each colour component) 144* 145* @param[in] pad_size 146* integer -padding size of the array 147* 148* @param[in] ht 149* integer height of the array 150* 151* @param[in] wd 152* integer width of the array 153* 154* @returns 155* 156* @remarks 157* None 158* 159******************************************************************************* 160*/ 161 162void ihevc_pad_left_chroma_ssse3(UWORD8 *pu1_src, 163 WORD32 src_strd, 164 WORD32 ht, 165 WORD32 pad_size) 166{ 167 WORD32 row; 168 WORD32 col; 169 UWORD8 *pu1_dst; 170 __m128i const0_16x8b, const1_16x8b; 171 const0_16x8b = _mm_setzero_si128(); 172 const1_16x8b = _mm_set1_epi8(1); 173 const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b); 174 175 ASSERT(pad_size % 8 == 0); 176 for(row = 0; row < ht; row++) 177 { 178 __m128i src_temp0_16x8b; 179 180 src_temp0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); 181 pu1_dst = pu1_src - pad_size; 182 src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b); 183 184 for(col = 0; col < pad_size; col += 8) 185 { 186 _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b); 187 } 188 pu1_src += src_strd; 189 } 190 191} 192 193 194 195/** 196******************************************************************************* 197* 198* @brief 199* Padding (luma block) at the right of a 2d array 200* 201* @par Description: 202* The right column of a 2d array is replicated for pad_size times at the right 203* 204* 205* @param[in] pu1_src 206* UWORD8 pointer to the source 207* 208* @param[in] src_strd 209* integer source stride 210* 211* @param[in] ht 212* integer height of the array 213* 214* @param[in] wd 215* integer width of the array 216* 217* @param[in] pad_size 218* integer -padding size of the array 219* 220* @param[in] ht 221* integer height of the array 222* 223* @param[in] wd 224* integer width of the array 225* 226* @returns 227* 228* @remarks 229* None 230* 231******************************************************************************* 232*/ 233 234void ihevc_pad_right_luma_ssse3(UWORD8 *pu1_src, 235 WORD32 src_strd, 236 WORD32 ht, 237 WORD32 pad_size) 238{ 239 WORD32 row; 240 WORD32 col; 241 UWORD8 *pu1_dst; 242 __m128i const0_16x8b; 243 244 ASSERT(pad_size % 8 == 0); 245 246 for(row = 0; row < ht; row++) 247 { 248 __m128i src_temp0_16x8b; 249 250 src_temp0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src - 1)); 251 const0_16x8b = _mm_setzero_si128(); 252 pu1_dst = pu1_src; 253 src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b); 254 for(col = 0; col < pad_size; col += 8) 255 { 256 _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b); 257 } 258 pu1_src += src_strd; 259 } 260 261} 262 263 264 265/** 266******************************************************************************* 267* 268* @brief 269* Padding (chroma block) at the right of a 2d array 270* 271* @par Description: 272* The right column of a 2d array is replicated for pad_size times at the right 273* 274* 275* @param[in] pu1_src 276* UWORD8 pointer to the source 277* 278* @param[in] src_strd 279* integer source stride 280* 281* @param[in] ht 282* integer height of the array 283* 284* @param[in] wd 285* integer width of the array (each colour component) 286* 287* @param[in] pad_size 288* integer -padding size of the array 289* 290* @param[in] ht 291* integer height of the array 292* 293* @param[in] wd 294* integer width of the array 295* 296* @returns 297* 298* @remarks 299* None 300* 301******************************************************************************* 302*/ 303 304void ihevc_pad_right_chroma_ssse3(UWORD8 *pu1_src, 305 WORD32 src_strd, 306 WORD32 ht, 307 WORD32 pad_size) 308{ 309 WORD32 row; 310 WORD32 col; 311 UWORD8 *pu1_dst; 312 __m128i const0_16x8b, const1_16x8b; 313 const0_16x8b = _mm_setzero_si128(); 314 const1_16x8b = _mm_set1_epi8(1); 315 const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b); 316 317 ASSERT(pad_size % 8 == 0); 318 319 for(row = 0; row < ht; row++) 320 { 321 __m128i src_temp0_16x8b; 322 323 src_temp0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src - 2)); 324 pu1_dst = pu1_src; 325 src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b); 326 for(col = 0; col < pad_size; col += 8) 327 { 328 _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b); 329 } 330 331 pu1_src += src_strd; 332 } 333} 334 335