10d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/****************************************************************************** 20d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 30d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 40d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 50d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Licensed under the Apache License, Version 2.0 (the "License"); 60d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* you may not use this file except in compliance with the License. 70d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* You may obtain a copy of the License at: 80d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 90d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* http://www.apache.org/licenses/LICENSE-2.0 100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Unless required by applicable law or agreed to in writing, software 120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* distributed under the License is distributed on an "AS IS" BASIS, 130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* See the License for the specific language governing permissions and 150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* limitations under the License. 160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************/ 180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/** 190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @file 210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* ihevc_padding_atom_intr.c 220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @brief 240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Contains function definitions for Padding 250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @author 270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Srinivas T 280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @par List of Functions: 300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* - ihevc_pad_left_luma_ssse3() 310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* - ihevc_pad_left_chroma_ssse3() 320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* - ihevc_pad_right_luma_ssse3() 330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* - ihevc_pad_right_chroma_ssse3() 340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @remarks 360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* None 370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*/ 400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include <string.h> 420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include <assert.h> 430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_typedefs.h" 440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_func_selector.h" 450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_platform_macros.h" 460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_mem_fns.h" 470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_debug.h" 480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include <immintrin.h> 500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/** 530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @brief 560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Padding (luma block) at the left of a 2d array 570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @par Description: 590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* The left column of a 2d array is replicated for pad_size times at the left 600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pu1_src 630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* UWORD8 pointer to the source 640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] src_strd 660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer source stride 670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] ht 690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer height of the array 700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] wd 720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer width of the array 730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pad_size 750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer -padding size of the array 760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] ht 780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer height of the array 790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] wd 810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer width of the array 820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @returns 840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @remarks 860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* None 870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*/ 900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarvoid ihevc_pad_left_luma_ssse3(UWORD8 *pu1_src, 920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 src_strd, 930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 ht, 940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 pad_size) 950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar{ 960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 row; 970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 i; 980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst; 990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i const0_16x8b; 1000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const0_16x8b = _mm_setzero_si128(); 1020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ASSERT(pad_size % 8 == 0); 1040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = 0; row < ht; row++) 1060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 1070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp0_16x8b; 1080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); 1100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst = pu1_src - pad_size; 1110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b); 1120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(i = 0; i < pad_size; i += 8) 1130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 1140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + i), src_temp0_16x8b); 1150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 1160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_src += src_strd; 1170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 1180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar} 1200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/** 1240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 1250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @brief 1270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Padding (chroma block) at the left of a 2d array 1280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @par Description: 1300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* The left column of a 2d array is replicated for pad_size times at the left 1310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pu1_src 1340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* UWORD8 pointer to the source 1350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] src_strd 1370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer source stride 1380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] ht 1400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer height of the array 1410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] wd 1430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer width of the array (each colour component) 1440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pad_size 1460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer -padding size of the array 1470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] ht 1490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer height of the array 1500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] wd 1520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer width of the array 1530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @returns 1550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @remarks 1570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* None 1580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 1600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*/ 1610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarvoid ihevc_pad_left_chroma_ssse3(UWORD8 *pu1_src, 1630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 src_strd, 1640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 ht, 1650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 pad_size) 1660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar{ 1670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 row; 1680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 col; 1690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst; 1700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i const0_16x8b, const1_16x8b; 1710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const0_16x8b = _mm_setzero_si128(); 1720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const1_16x8b = _mm_set1_epi8(1); 1730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b); 1740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ASSERT(pad_size % 8 == 0); 1760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = 0; row < ht; row++) 1770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 1780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp0_16x8b; 1790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); 1810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst = pu1_src - pad_size; 1820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b); 1830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(col = 0; col < pad_size; col += 8) 1850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 1860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b); 1870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 1880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_src += src_strd; 1890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 1900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar} 1920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/** 1960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 1970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @brief 1990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Padding (luma block) at the right of a 2d array 2000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @par Description: 2020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* The right column of a 2d array is replicated for pad_size times at the right 2030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pu1_src 2060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* UWORD8 pointer to the source 2070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] src_strd 2090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer source stride 2100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] ht 2120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer height of the array 2130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] wd 2150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer width of the array 2160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pad_size 2180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer -padding size of the array 2190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] ht 2210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer height of the array 2220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] wd 2240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer width of the array 2250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @returns 2270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @remarks 2290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* None 2300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 2320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*/ 2330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarvoid ihevc_pad_right_luma_ssse3(UWORD8 *pu1_src, 2350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 src_strd, 2360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 ht, 2370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 pad_size) 2380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar{ 2390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 row; 2400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 col; 2410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst; 2420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i const0_16x8b; 2430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ASSERT(pad_size % 8 == 0); 2450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = 0; row < ht; row++) 2470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 2480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp0_16x8b; 2490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src - 1)); 2510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const0_16x8b = _mm_setzero_si128(); 2520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst = pu1_src; 2530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b); 2540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(col = 0; col < pad_size; col += 8) 2550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 2560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b); 2570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 2580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_src += src_strd; 2590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 2600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar} 2620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/** 2660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 2670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @brief 2690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Padding (chroma block) at the right of a 2d array 2700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @par Description: 2720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* The right column of a 2d array is replicated for pad_size times at the right 2730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pu1_src 2760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* UWORD8 pointer to the source 2770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] src_strd 2790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer source stride 2800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] ht 2820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer height of the array 2830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] wd 2850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer width of the array (each colour component) 2860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pad_size 2880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer -padding size of the array 2890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] ht 2910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer height of the array 2920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] wd 2940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer width of the array 2950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @returns 2970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @remarks 2990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* None 3000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 3020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*/ 3030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarvoid ihevc_pad_right_chroma_ssse3(UWORD8 *pu1_src, 3050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 src_strd, 3060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 ht, 3070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 pad_size) 3080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar{ 3090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 row; 3100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 col; 3110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst; 3120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i const0_16x8b, const1_16x8b; 3130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const0_16x8b = _mm_setzero_si128(); 3140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const1_16x8b = _mm_set1_epi8(1); 3150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b); 3160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ASSERT(pad_size % 8 == 0); 3180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = 0; row < ht; row++) 3200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 3210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp0_16x8b; 3220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src - 2)); 3240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst = pu1_src; 3250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b); 3260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(col = 0; col < pad_size; col += 8) 3270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 3280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b); 3290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 3300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_src += src_strd; 3320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 3330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar} 3340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 335