10d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/****************************************************************************** 20d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 30d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 40d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 50d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Licensed under the Apache License, Version 2.0 (the "License"); 60d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* you may not use this file except in compliance with the License. 70d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* You may obtain a copy of the License at: 80d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 90d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* http://www.apache.org/licenses/LICENSE-2.0 100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Unless required by applicable law or agreed to in writing, software 120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* distributed under the License is distributed on an "AS IS" BASIS, 130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* See the License for the specific language governing permissions and 150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* limitations under the License. 160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************/ 180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/** 190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @file 210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* ihevcd_frm_cvt_x86_intr.c 220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @brief 240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Platform specific intrinsic implementation of certain functions 250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @author 270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Ittiam 280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @par List of Functions: 290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* - ihevcd_itrans_recon_dc 300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* - ihevcd_fmt_conv_420sp_to_420p 310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @remarks 330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* None 340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*/ 370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "string.h" 380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_typedefs.h" 390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_defs.h" 400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_macros.h" 410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_platform_macros.h" 420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevcd_function_selector.h" 430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include <string.h> 440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include <immintrin.h> 450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarvoid ihevcd_fmt_conv_420sp_to_420p_ssse3(UWORD8 *pu1_y_src, 480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_uv_src, 490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_y_dst, 500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_u_dst, 510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_v_dst, 520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 wd, 530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 ht, 540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 src_y_strd, 550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 src_uv_strd, 560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 dst_y_strd, 570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 dst_uv_strd, 580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 is_u_first, 590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 disable_luma_copy) 600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar{ 610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_src, *pu1_dst; 620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_u_src, *pu1_v_src; 630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 num_rows, num_cols, src_strd, dst_strd, cols, rows; 640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 i, j; 650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar cols = 0; 670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_u_src = (UWORD8 *)pu1_uv_src; 680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_v_src = (UWORD8 *)pu1_uv_src + 1; 690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar if(0 == disable_luma_copy) 700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* copy luma */ 720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_src = (UWORD8 *)pu1_y_src; 730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst = (UWORD8 *)pu1_y_dst; 740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar num_rows = ht; 760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar num_cols = wd; 770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_strd = src_y_strd; 790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar dst_strd = dst_y_strd; 800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(i = 0; i < num_rows; i++) 810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar memcpy(pu1_dst, pu1_src, num_cols); 830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst += dst_strd; 840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_src += src_strd; 850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* de-interleave U and V and copy to destination */ 890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar if(!is_u_first) 900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *temp = pu1_u_dst; 920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_u_dst = pu1_v_dst; 930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_v_dst = temp; 940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_u_src = (UWORD8 *)pu1_uv_src + 1; 960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_v_src = (UWORD8 *)pu1_uv_src; 970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 1000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_uv0_8x16b, src_uv1_8x16b, src_u_8x16b, src_v_8x16b; 1010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i temp0_8x16b, temp1_8x16b, alt_first_mask; 1020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 FIRST_ALT_SHUFFLE[16] = { 1040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 0x00, 0x02, 0x04, 0x06, 1050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 0x08, 0x0A, 0x0C, 0x0E, 1060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 0x01, 0x03, 0x05, 0x07, 1070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 0x09, 0x0B, 0x0D, 0x0F }; 1080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar PREFETCH((char const *)(pu1_uv_src + (0 * src_uv_strd)), _MM_HINT_T0) 1100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar PREFETCH((char const *)(pu1_uv_src + (1 * src_uv_strd)), _MM_HINT_T0) 1110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar PREFETCH((char const *)(pu1_uv_src + (2 * src_uv_strd)), _MM_HINT_T0) 1120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar PREFETCH((char const *)(pu1_uv_src + (3 * src_uv_strd)), _MM_HINT_T0) 1130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar PREFETCH((char const *)(pu1_uv_src + (4 * src_uv_strd)), _MM_HINT_T0) 1140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar PREFETCH((char const *)(pu1_uv_src + (5 * src_uv_strd)), _MM_HINT_T0) 1150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar PREFETCH((char const *)(pu1_uv_src + (6 * src_uv_strd)), _MM_HINT_T0) 1160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar PREFETCH((char const *)(pu1_uv_src + (7 * src_uv_strd)), _MM_HINT_T0) 1170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar num_rows = ht >> 1; 1190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar num_cols = wd >> 1; 1200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_strd = src_uv_strd; 1220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar dst_strd = dst_uv_strd; 1230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar alt_first_mask = _mm_loadu_si128((__m128i *)&FIRST_ALT_SHUFFLE[0]); 1250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar if(num_cols > 15) 1270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 1280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar cols = num_cols >> 4; 1290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(i = 0; i < (num_rows >> 2); i++) 1310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 1320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_uv_src_temp, *pu1_u_dst_temp, *pu1_v_dst_temp; 1330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar PREFETCH((char const *)(pu1_uv_src + (8 * src_strd)), _MM_HINT_T0) 1350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar PREFETCH((char const *)(pu1_uv_src + (9 * src_strd)), _MM_HINT_T0) 1360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar PREFETCH((char const *)(pu1_uv_src + (10 * src_strd)), _MM_HINT_T0) 1370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar PREFETCH((char const *)(pu1_uv_src + (11 * src_strd)), _MM_HINT_T0) 1380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_uv_src_temp = pu1_uv_src; 1400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_u_dst_temp = pu1_u_dst; 1410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_v_dst_temp = pu1_v_dst; 1420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(j = 0; j < cols; j++) 1440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 1450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /**** Row 0 ***/ 1470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_uv0_8x16b = _mm_loadu_si128((__m128i *)pu1_uv_src_temp); 1480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_uv1_8x16b = _mm_loadu_si128((__m128i *)(pu1_uv_src_temp + 16)); 1490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp0_8x16b = _mm_shuffle_epi8(src_uv0_8x16b, alt_first_mask); 1510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1_8x16b = _mm_shuffle_epi8(src_uv1_8x16b, alt_first_mask); 1520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_u_8x16b = _mm_unpacklo_epi64(temp0_8x16b, temp1_8x16b); 1540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_v_8x16b = _mm_unpackhi_epi64(temp0_8x16b, temp1_8x16b); 1550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_u_dst_temp), src_u_8x16b); 1570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_v_dst_temp), src_v_8x16b); 1580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /**** Row 1 ***/ 1600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_uv0_8x16b = _mm_loadu_si128((__m128i *)(pu1_uv_src_temp + (1 * src_strd))); 1610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_uv1_8x16b = _mm_loadu_si128((__m128i *)(pu1_uv_src_temp + (1 * src_strd) + 16)); 1620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp0_8x16b = _mm_shuffle_epi8(src_uv0_8x16b, alt_first_mask); 1640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1_8x16b = _mm_shuffle_epi8(src_uv1_8x16b, alt_first_mask); 1650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_u_8x16b = _mm_unpacklo_epi64(temp0_8x16b, temp1_8x16b); 1670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_v_8x16b = _mm_unpackhi_epi64(temp0_8x16b, temp1_8x16b); 1680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_u_dst_temp + (1 * dst_strd)), src_u_8x16b); 1700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_v_dst_temp + (1 * dst_strd)), src_v_8x16b); 1710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /**** Row 2 ***/ 1730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_uv0_8x16b = _mm_loadu_si128((__m128i *)(pu1_uv_src_temp + (2 * src_strd))); 1740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_uv1_8x16b = _mm_loadu_si128((__m128i *)(pu1_uv_src_temp + (2 * src_strd) + 16)); 1750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp0_8x16b = _mm_shuffle_epi8(src_uv0_8x16b, alt_first_mask); 1770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1_8x16b = _mm_shuffle_epi8(src_uv1_8x16b, alt_first_mask); 1780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_u_8x16b = _mm_unpacklo_epi64(temp0_8x16b, temp1_8x16b); 1800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_v_8x16b = _mm_unpackhi_epi64(temp0_8x16b, temp1_8x16b); 1810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_u_dst_temp + (2 * dst_strd)), src_u_8x16b); 1830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_v_dst_temp + (2 * dst_strd)), src_v_8x16b); 1840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /**** Row 3 ***/ 1860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_uv0_8x16b = _mm_loadu_si128((__m128i *)(pu1_uv_src_temp + (3 * src_strd))); 1870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_uv1_8x16b = _mm_loadu_si128((__m128i *)(pu1_uv_src_temp + (3 * src_strd) + 16)); 1880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp0_8x16b = _mm_shuffle_epi8(src_uv0_8x16b, alt_first_mask); 1900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1_8x16b = _mm_shuffle_epi8(src_uv1_8x16b, alt_first_mask); 1910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_u_8x16b = _mm_unpacklo_epi64(temp0_8x16b, temp1_8x16b); 1930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_v_8x16b = _mm_unpackhi_epi64(temp0_8x16b, temp1_8x16b); 1940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_u_dst_temp + (3 * dst_strd)), src_u_8x16b); 1960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_v_dst_temp + (3 * dst_strd)), src_v_8x16b); 1970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_u_dst_temp += 16; 1990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_v_dst_temp += 16; 2000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_uv_src_temp += 32; 2010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 2020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_u_dst += 4 * dst_strd; 2040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_v_dst += 4 * dst_strd; 2050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_uv_src += 4 * src_strd; 2060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //pu1_v_src += src_strd; 2070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 2080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar rows = num_rows & 0x3; 2090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar if(rows) 2100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 2110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(i = 0; i < rows; i++) 2120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 2130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_uv_src_temp, *pu1_u_dst_temp, *pu1_v_dst_temp; 2140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_uv_src_temp = pu1_uv_src; 2160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_u_dst_temp = pu1_u_dst; 2170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_v_dst_temp = pu1_v_dst; 2180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(j = 0; j < cols; j++) 2200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 2210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_uv0_8x16b = _mm_loadu_si128((__m128i *)pu1_uv_src_temp); 2230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_uv1_8x16b = _mm_loadu_si128((__m128i *)(pu1_uv_src_temp + 16)); 2240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp0_8x16b = _mm_shuffle_epi8(src_uv0_8x16b, alt_first_mask); 2260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1_8x16b = _mm_shuffle_epi8(src_uv1_8x16b, alt_first_mask); 2270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_u_8x16b = _mm_unpacklo_epi64(temp0_8x16b, temp1_8x16b); 2290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_v_8x16b = _mm_unpackhi_epi64(temp0_8x16b, temp1_8x16b); 2300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_u_dst_temp), src_u_8x16b); 2320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_v_dst_temp), src_v_8x16b); 2330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_u_dst_temp += 16; 2350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_v_dst_temp += 16; 2360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_uv_src_temp += 32; 2370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 2380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_u_dst += dst_strd; 2400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_v_dst += dst_strd; 2410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_uv_src += src_strd; 2420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 2430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 2440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_u_dst -= (num_rows * dst_strd); 2450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_v_dst -= (num_rows * dst_strd); 2460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar num_cols &= 0x0F; 2470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 2480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar if(num_cols) 2490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 2500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_u_dst += (cols << 4); 2510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_v_dst += (cols << 4); 2520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_u_src += 2 * (cols << 4); 2530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_v_src += 2 * (cols << 4); 2540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(i = 0; i < num_rows; i++) 2550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 2560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(j = 0; j < num_cols; j++) 2570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 2580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_u_dst[j] = pu1_u_src[j * 2]; 2590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_v_dst[j] = pu1_v_src[j * 2]; 2600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 2610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_u_dst += dst_strd; 2630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_v_dst += dst_strd; 2640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_u_src += src_strd; 2650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_v_src += src_strd; 2660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 2670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 2680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 2690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar return; 2700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar} 271