10d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/****************************************************************************** 20d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 30d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 40d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 50d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Licensed under the Apache License, Version 2.0 (the "License"); 60d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* you may not use this file except in compliance with the License. 70d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* You may obtain a copy of the License at: 80d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 90d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* http://www.apache.org/licenses/LICENSE-2.0 100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Unless required by applicable law or agreed to in writing, software 120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* distributed under the License is distributed on an "AS IS" BASIS, 130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* See the License for the specific language governing permissions and 150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* limitations under the License. 160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************/ 180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/** 190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @file 210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* ihevc_intra_pred_filters_x86_intr.c 220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @brief 240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Contains function Definition for intra prediction interpolation filters 250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @author 280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Ittiam 290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @par List of Functions: 310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* - ihevc_intra_pred_ref_filtering_sse42() 320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* - ihevc_intra_pred_luma_dc_sse42() 330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* - ihevc_intra_pred_luma_horz_sse42() 340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* - ihevc_intra_pred_luma_ver_sse42() 350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* - ihevc_intra_pred_luma_mode_3_to_9_sse42() 360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* - ihevc_intra_pred_luma_mode_11_to_17_sse42() 370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* - ihevc_intra_pred_luma_mode_19_to_25_sse42() 380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* - ihevc_intra_pred_luma_mode_27_to_33_sse42() 390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @remarks 410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* None 420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*/ 450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/*****************************************************************************/ 480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/* File Includes */ 490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/*****************************************************************************/ 500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include <stdlib.h> 510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_typedefs.h" 530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_intra_pred.h" 540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_macros.h" 550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_func_selector.h" 560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_platform_macros.h" 570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_common_tables.h" 580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_defs.h" 590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_tables_x86_intr.h" 600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include <immintrin.h> 620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/****************************************************************************/ 640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/* Constant Macros */ 650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/****************************************************************************/ 660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#define MAX_CU_SIZE 64 670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#define BIT_DEPTH 8 680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#define T32_4NT 128 690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#define T16_4NT 64 700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/****************************************************************************/ 730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/* Function Macros */ 740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/****************************************************************************/ 750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#define GET_BITS(y,x) ((y) & (1 << x)) && (1 << x) 760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/* tables to shuffle 8-bit values */ 780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/*****************************************************************************/ 800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/* global tables Definition */ 810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/*****************************************************************************/ 820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/*****************************************************************************/ 860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/* Function Definition */ 870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/*****************************************************************************/ 880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/** 900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @brief 930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Intra prediction interpolation filter for ref_filtering 940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @par Description: 970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Reference DC filtering for neighboring samples dependent on TU size and 980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* mode Refer to section 8.4.4.2.3 in the standard 990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pu1_src 1010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* UWORD8 pointer to the source 1020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[out] pu1_dst 1040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* UWORD8 pointer to the destination 1050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] nt 1070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer Transform Block size 1080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] mode 1100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer intraprediction mode 1110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @returns 1130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @remarks 1150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* None 1160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 1180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*/ 1190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarvoid ihevc_intra_pred_ref_filtering_sse42(UWORD8 *pu1_src, 1220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 nt, 1230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst, 1240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 mode, 1250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 strong_intra_smoothing_enable_flag) 1260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar{ 1270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 filter_flag; 1280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 i; /* Generic indexing variable */ 1290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 four_nt = 4 * nt; 1300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 au1_flt[(4 * MAX_CU_SIZE) + 1]; 1310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 bi_linear_int_flag = 0; 1320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 abs_cond_left_flag = 0; 1330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 abs_cond_top_flag = 0; 1340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 dc_val = 1 << (BIT_DEPTH - 5); 1350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp1, src_temp2, src_temp3, src_temp7; 1360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp4, src_temp5, src_temp6, src_temp8; 1370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //WORD32 strong_intra_smoothing_enable_flag = 1; 1390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(nt) - 2)); 1430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar if(0 == filter_flag) 1440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 1450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar if(pu1_src == pu1_dst) 1460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 1470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar return; 1480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 1490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else 1500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 1510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar if(nt == 4) 1520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 1530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_loadu_si128((__m128i *)(pu1_src)); 1540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst), src_temp1); 1550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst[four_nt] = pu1_src[four_nt]; 1560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 1580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else if(nt == 8) 1600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 1610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_loadu_si128((__m128i *)(pu1_src)); 1630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_loadu_si128((__m128i *)(pu1_src + 16)); 1640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst), src_temp1); 1660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16), src_temp2); 1670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst[four_nt] = pu1_src[four_nt]; 1700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 1710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else if(nt == 16) 1720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 1730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_loadu_si128((__m128i *)(pu1_src)); 1750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_loadu_si128((__m128i *)(pu1_src + 16)); 1760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_loadu_si128((__m128i *)(pu1_src + 32)); 1770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_loadu_si128((__m128i *)(pu1_src + 48)); 1780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst), src_temp1); 1800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16), src_temp2); 1810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 32), src_temp3); 1820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 48), src_temp4); 1830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst[four_nt] = pu1_src[four_nt]; 1850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 1860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else if(nt == 32) 1870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 1880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_loadu_si128((__m128i *)(pu1_src)); 1900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_loadu_si128((__m128i *)(pu1_src + 16)); 1910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_loadu_si128((__m128i *)(pu1_src + 32)); 1920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_loadu_si128((__m128i *)(pu1_src + 48)); 1930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5 = _mm_loadu_si128((__m128i *)(pu1_src + 64)); 1950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6 = _mm_loadu_si128((__m128i *)(pu1_src + 80)); 1960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7 = _mm_loadu_si128((__m128i *)(pu1_src + 96)); 1970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8 = _mm_loadu_si128((__m128i *)(pu1_src + 112)); 1980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst), src_temp1); 2000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16), src_temp2); 2010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 32), src_temp3); 2020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 48), src_temp4); 2030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 64), src_temp5); 2050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 80), src_temp6); 2060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 96), src_temp7); 2070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 112), src_temp8); 2080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst[four_nt] = pu1_src[four_nt]; 2100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 2110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 2130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 2140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else 2160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 2170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* If strong intra smoothin is enabled and transform size is 32 */ 2180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar if((1 == strong_intra_smoothing_enable_flag) && (32 == nt)) 2190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 2200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Strong Intra Filtering */ 2210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar abs_cond_top_flag = (abs(pu1_src[2 * nt] + pu1_src[4 * nt] 2220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar - (2 * pu1_src[3 * nt]))) < dc_val; 2230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar abs_cond_left_flag = (abs(pu1_src[2 * nt] + pu1_src[0] 2240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar - (2 * pu1_src[nt]))) < dc_val; 2250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar bi_linear_int_flag = ((1 == abs_cond_left_flag) 2270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar && (1 == abs_cond_top_flag)); 2280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 2290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Extremities Untouched*/ 2300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar au1_flt[0] = pu1_src[0]; 2310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar au1_flt[4 * nt] = pu1_src[4 * nt]; 2320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Strong filtering of reference samples */ 2340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar if(1 == bi_linear_int_flag) 2350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 2360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar au1_flt[2 * nt] = pu1_src[2 * nt]; 2370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(i = 1; i < (2 * nt); i++) 2390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar au1_flt[i] = (((2 * nt) - i) * pu1_src[0] + i * pu1_src[2 * nt] + 32) >> 6; 2400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(i = 1; i < (2 * nt); i++) 2420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar au1_flt[i + (2 * nt)] = (((2 * nt) - i) * pu1_src[2 * nt] + i * pu1_src[4 * nt] + 32) >> 6; 2430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 2440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else 2450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 2460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i const_value_8x16; 2470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_value_8x16 = _mm_set1_epi16(2); 2490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar au1_flt[0] = pu1_src[0]; 2510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar au1_flt[4 * nt] = pu1_src[4 * nt]; 2520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Perform bilinear filtering of Reference Samples */ 2540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(i = 0; i < (four_nt); i += 16) 2550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 2560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_loadu_si128((__m128i *)(pu1_src + i)); 2570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_srli_si128(src_temp1, 1); 2580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_srli_si128(src_temp2, 1); 2590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_cvtepu8_epi16(src_temp1); 2610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_cvtepu8_epi16(src_temp2); 2620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_cvtepu8_epi16(src_temp3); 2630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_slli_epi16(src_temp2, 1); 2650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_add_epi16(src_temp1, src_temp2); 2670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_add_epi16(src_temp1, src_temp3); 2680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_add_epi16(src_temp1, const_value_8x16); 2690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_srai_epi16(src_temp1, 2); 2710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_loadu_si128((__m128i *)(pu1_src + 8 + i)); 2730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5 = _mm_srli_si128(src_temp4, 1); 2740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6 = _mm_srli_si128(src_temp5, 1); 2750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_cvtepu8_epi16(src_temp4); 2770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5 = _mm_cvtepu8_epi16(src_temp5); 2780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6 = _mm_cvtepu8_epi16(src_temp6); 2790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5 = _mm_slli_epi16(src_temp5, 1); 2810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_add_epi16(src_temp4, src_temp5); 2830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_add_epi16(src_temp4, src_temp6); 2840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_add_epi16(src_temp4, const_value_8x16); 2850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_srai_epi16(src_temp4, 2); 2870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 2890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_packus_epi16(src_temp1, src_temp4); 2900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(au1_flt + 1 + i), src_temp1); 2920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 2930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar au1_flt[4 * nt] = pu1_src[4 * nt]; 2940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 2950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar if(nt == 4) 2970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 2980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_loadu_si128((__m128i *)(au1_flt)); 2990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst), src_temp1); 3000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst[four_nt] = au1_flt[four_nt]; 3010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 3020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else if(nt == 8) 3030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 3040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_loadu_si128((__m128i *)(au1_flt)); 3060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_loadu_si128((__m128i *)(au1_flt + 16)); 3070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst), src_temp1); 3090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16), src_temp2); 3100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst[four_nt] = au1_flt[four_nt]; 3120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 3130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else if(nt == 16) 3140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 3150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_loadu_si128((__m128i *)(au1_flt)); 3170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_loadu_si128((__m128i *)(au1_flt + 16)); 3180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_loadu_si128((__m128i *)(au1_flt + 32)); 3190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_loadu_si128((__m128i *)(au1_flt + 48)); 3200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst), src_temp1); 3220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16), src_temp2); 3230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 32), src_temp3); 3240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 48), src_temp4); 3250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst[four_nt] = au1_flt[four_nt]; 3270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 3280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else if(nt == 32) 3300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 3310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_loadu_si128((__m128i *)(au1_flt)); 3330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_loadu_si128((__m128i *)(au1_flt + 16)); 3340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_loadu_si128((__m128i *)(au1_flt + 32)); 3350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_loadu_si128((__m128i *)(au1_flt + 48)); 3360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5 = _mm_loadu_si128((__m128i *)(au1_flt + 64)); 3380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6 = _mm_loadu_si128((__m128i *)(au1_flt + 80)); 3390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7 = _mm_loadu_si128((__m128i *)(au1_flt + 96)); 3400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8 = _mm_loadu_si128((__m128i *)(au1_flt + 112)); 3410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst), src_temp1); 3430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16), src_temp2); 3440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 32), src_temp3); 3450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 48), src_temp4); 3460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 64), src_temp5); 3480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 80), src_temp6); 3490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 96), src_temp7); 3500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 112), src_temp8); 3510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst[four_nt] = au1_flt[four_nt]; 3530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 3540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 3560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar} 3570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/** 3610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 3620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @brief 3640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Intra prediction interpolation filter for luma dc 3650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @par Description: 3670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Intraprediction for DC mode with reference neighboring samples location 3680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* pointed by 'pu1_ref' to the TU block location pointed by 'pu1_dst' Refer 3690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* to section 8.4.4.2.5 in the standard 3700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pu1_src 3720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* UWORD8 pointer to the source 3730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[out] pu1_dst 3750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* UWORD8 pointer to the destination 3760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] src_strd 3780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer source stride 3790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] dst_strd 3810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer destination stride 3820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] nt 3840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer Transform Block size 3850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] mode 3870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer intraprediction mode 3880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @returns 3900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @remarks 3920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* None 3930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 3950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*/ 3960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarvoid ihevc_intra_pred_luma_dc_sse42(UWORD8 *pu1_ref, 3980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 src_strd, 3990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst, 4000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 dst_strd, 4010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 nt, 4020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 mode) 4030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar{ 4040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 acc_dc; 4060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 dc_val, two_dc_val, three_dc_val; 4070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 row; 4080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 log2nt = 5; 4090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 two_nt, three_nt; 4100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp1, src_temp7, src_temp3, src_temp4, src_temp5, src_temp6; 4110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp8, src_temp9, src_temp10, src_temp2; 4120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i m_zero = _mm_set1_epi32(0); 4130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i sm = _mm_loadu_si128((__m128i *)&IHEVCE_SHUFFLEMASK5[0]); 4140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UNUSED(src_strd); 4150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UNUSED(mode); 4160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar switch(nt) 4190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 4200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar case 32: 4210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar log2nt = 5; 4220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar break; 4230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar case 16: 4240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar log2nt = 4; 4250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar break; 4260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar case 8: 4270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar log2nt = 3; 4280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar break; 4290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar case 4: 4300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar log2nt = 2; 4310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar break; 4320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar default: 4330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar break; 4340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 4350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_nt = 2 * nt; 4360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar three_nt = 3 * nt; 4370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar acc_dc = 0; 4390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Calculate DC value for the transform block */ 4400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar if(nt == 32) 4440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 4450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i temp; 4460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 itr_count; 4470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_loadu_si128((__m128i *)(pu1_ref + nt)); 4490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + nt + 16)); 4500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7 = _mm_loadu_si128((__m128i *)(pu1_ref + nt + 32)); 4510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8 = _mm_loadu_si128((__m128i *)(pu1_ref + nt + 48)); 4520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_sad_epu8(src_temp3, m_zero); 4540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_sad_epu8(src_temp4, m_zero); 4550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7 = _mm_sad_epu8(src_temp7, m_zero); 4560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8 = _mm_sad_epu8(src_temp8, m_zero); 4570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_add_epi16(src_temp3, src_temp4); 4590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8 = _mm_add_epi16(src_temp7, src_temp8); 4600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_add_epi16(src_temp4, src_temp8); 4610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_shuffle_epi8(src_temp4, sm); 4630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_hadd_epi16(src_temp4, m_zero); 4640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar acc_dc = _mm_cvtsi128_si32(src_temp4); 4660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar acc_dc += pu1_ref[three_nt]; 4680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar acc_dc -= pu1_ref[two_nt]; 4690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* computing acc_dc value */ 4710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar dc_val = (acc_dc + nt) >> (log2nt + 1); 4720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_dc_val = 2 * dc_val; 4740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar three_dc_val = 3 * dc_val; 4750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp = _mm_set1_epi8(dc_val); 4770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(itr_count = 0; itr_count < 2; itr_count++) 4790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 4800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* pu1_dst[(row * dst_strd) + col] = dc_val;*/ 4810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((0) * dst_strd)), temp); 4820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((1) * dst_strd)), temp); 4830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((2) * dst_strd)), temp); 4840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((3) * dst_strd)), temp); 4850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((4) * dst_strd)), temp); 4860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((5) * dst_strd)), temp); 4870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((6) * dst_strd)), temp); 4880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((7) * dst_strd)), temp); 4890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((8) * dst_strd)), temp); 4910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((9) * dst_strd)), temp); 4920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((10) * dst_strd)), temp); 4930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((11) * dst_strd)), temp); 4940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((12) * dst_strd)), temp); 4950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((13) * dst_strd)), temp); 4960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((14) * dst_strd)), temp); 4970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((15) * dst_strd)), temp); 4980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((0) * dst_strd)), temp); 5000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((1) * dst_strd)), temp); 5010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((2) * dst_strd)), temp); 5020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((3) * dst_strd)), temp); 5030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((4) * dst_strd)), temp); 5040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((5) * dst_strd)), temp); 5050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((6) * dst_strd)), temp); 5060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((7) * dst_strd)), temp); 5070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((8) * dst_strd)), temp); 5090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((9) * dst_strd)), temp); 5100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((10) * dst_strd)), temp); 5110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((11) * dst_strd)), temp); 5120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((12) * dst_strd)), temp); 5130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((13) * dst_strd)), temp); 5140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((14) * dst_strd)), temp); 5150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((15) * dst_strd)), temp); 5160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst += 16 * dst_strd; 5180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 5190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 5200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else 5220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 5240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i zero_8x16b; 5250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i sm1 = _mm_loadu_si128((__m128i *)&IHEVCE_SHUFFLEMASK4[0]); 5260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* DC filtering for the first top row and first left column */ 5280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar zero_8x16b = _mm_set1_epi16(0); 5300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar if(nt == 4) /* nt multiple of 4*/ 5320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 5330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 temp1, temp2, temp3; 5340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_loadu_si128((__m128i *)(pu1_ref + nt)); 5360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt + 1)); 5370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_cvtepu8_epi16(src_temp3); 5390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_cvtepu8_epi16(src_temp2); 5400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_hadd_epi16(src_temp4, m_zero); 5420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_hadd_epi16(src_temp4, m_zero); 5430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_hadd_epi16(src_temp4, m_zero); 5440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar acc_dc = _mm_cvtsi128_si32(src_temp4); 5460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar acc_dc += pu1_ref[three_nt]; 5470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar acc_dc -= pu1_ref[two_nt]; 5480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/* computing acc_dc value */ 5500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar dc_val = (acc_dc + nt) >> (log2nt + 1); 5520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar three_dc_val = 3 * dc_val; 5540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loding 8-bit 16 pixel */ 5560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_set1_epi16(three_dc_val + 2); 5570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_dc_val = 2 * dc_val; 5580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(pu1_ref[two_nt + 1 + col] + three_dc_val + 2 */ 5600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_add_epi16(src_temp2, src_temp1); 5610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(pu1_ref[two_nt + 1 + col] + three_dc_val + 2) >> 2 */ 5630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_srli_epi16(src_temp2, 2); 5640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_packus_epi16(src_temp2, zero_8x16b); 5660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1 = _mm_cvtsi128_si32(src_temp2); 5680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[0]) = temp1; 5700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* retore first value*/ 5720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst[0] = ((pu1_ref[two_nt - 1] + two_dc_val + pu1_ref[two_nt + 1] + 2) 5730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar >> 2); 5740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = 1; row < nt; row++) 5760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst[row * dst_strd] = (pu1_ref[two_nt - 1 - row] + three_dc_val + 2) 5770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar >> 2; 5780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_insert_epi8(src_temp2, dc_val, 0); 5800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_shuffle_epi8(src_temp2, sm1); 5820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_shuffle_epi8(src_temp2, sm1); 5830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_shuffle_epi8(src_temp2, sm1); 5840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_insert_epi8(src_temp2, pu1_dst[(1 * dst_strd) + 0], 0); 5860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_insert_epi8(src_temp3, pu1_dst[(2 * dst_strd) + 0], 0); 5870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_insert_epi8(src_temp4, pu1_dst[(3 * dst_strd) + 0], 0); 5880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1 = _mm_cvtsi128_si32(src_temp2); 5900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp2 = _mm_cvtsi128_si32(src_temp3); 5910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp3 = _mm_cvtsi128_si32(src_temp4); 5920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(1 * dst_strd)]) = temp1; 5940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(2 * dst_strd)]) = temp2; 5950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(3 * dst_strd)]) = temp3; 5960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 5980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else if(nt == 8) /* if nt%8==0*/ 5990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 6000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_loadu_si128((__m128i *)(pu1_ref + nt)); 6020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_sad_epu8(src_temp3, m_zero); 6040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_shuffle_epi8(src_temp4, sm); 6050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_hadd_epi16(src_temp4, m_zero); 6060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar acc_dc = _mm_cvtsi128_si32(src_temp4); 6080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar acc_dc += pu1_ref[three_nt]; 6100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar acc_dc -= pu1_ref[two_nt]; 6110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* computing acc_dc value */ 6130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar dc_val = (acc_dc + nt) >> (log2nt + 1); 6150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar three_dc_val = 3 * dc_val; 6170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_set1_epi16(three_dc_val + 2); 6180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_dc_val = 2 * dc_val; 6190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loding 8-bit 16 pixel */ 6210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt + 1)); 6220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_cvtepu8_epi16(src_temp2); 6230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(pu1_ref[two_nt + 1 + col] + three_dc_val + 2 */ 6250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_add_epi16(src_temp2, src_temp1); 6260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(pu1_ref[two_nt + 1 + col] + three_dc_val + 2)>>2 */ 6280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_srli_epi16(src_temp2, 2); 6290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_packus_epi16(src_temp2, zero_8x16b); 6300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst), src_temp2); 6320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* retore first value*/ 6340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst[0] = ((pu1_ref[two_nt - 1] + two_dc_val + pu1_ref[two_nt + 1] + 2) 6350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar >> 2); 6360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = 1; row < nt; row++) 6380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst[row * dst_strd] = (pu1_ref[two_nt - 1 - row] + three_dc_val + 2) 6390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar >> 2; 6400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Fill the remaining rows with DC value*/ 6420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_set1_epi8(dc_val); 6440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_set1_epi8(dc_val); 6450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_set1_epi8(dc_val); 6460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_set1_epi8(dc_val); 6470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5 = _mm_set1_epi8(dc_val); 6480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6 = _mm_set1_epi8(dc_val); 6490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7 = _mm_set1_epi8(dc_val); 6500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_insert_epi8(src_temp1, pu1_dst[((1) * dst_strd)], 0); 6520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_insert_epi8(src_temp2, pu1_dst[((2) * dst_strd)], 0); 6530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_insert_epi8(src_temp3, pu1_dst[((3) * dst_strd)], 0); 6540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_insert_epi8(src_temp4, pu1_dst[((4) * dst_strd)], 0); 6550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5 = _mm_insert_epi8(src_temp5, pu1_dst[((5) * dst_strd)], 0); 6560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6 = _mm_insert_epi8(src_temp6, pu1_dst[((6) * dst_strd)], 0); 6570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7 = _mm_insert_epi8(src_temp7, pu1_dst[((7) * dst_strd)], 0); 6580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + ((1) * dst_strd)), src_temp1); 6600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + ((2) * dst_strd)), src_temp2); 6610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + ((3) * dst_strd)), src_temp3); 6620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + ((4) * dst_strd)), src_temp4); 6630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + ((5) * dst_strd)), src_temp5); 6640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + ((6) * dst_strd)), src_temp6); 6650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + ((7) * dst_strd)), src_temp7); 6660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 6680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else if(nt == 16) /* if nt%8==0*/ 6690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 6700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_loadu_si128((__m128i *)(pu1_ref + nt)); 6720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + nt + 16)); 6730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt + 1)); 6750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp10 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt + 1 + 8)); 6760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_sad_epu8(src_temp3, m_zero); 6780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_sad_epu8(src_temp4, m_zero); 6790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_cvtepu8_epi16(src_temp2); 6810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp10 = _mm_cvtepu8_epi16(src_temp10); 6820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_add_epi16(src_temp3, src_temp4); 6840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_shuffle_epi8(src_temp4, sm); 6850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_hadd_epi16(src_temp4, m_zero); 6860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar acc_dc = _mm_cvtsi128_si32(src_temp4); 6880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar acc_dc += pu1_ref[three_nt]; 6900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar acc_dc -= pu1_ref[two_nt]; 6910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* computing acc_dc value */ 6930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar dc_val = (acc_dc + nt) >> (log2nt + 1); 6950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar three_dc_val = 3 * dc_val; 6970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_set1_epi16(three_dc_val + 2); 6980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_dc_val = 2 * dc_val; 6990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(pu1_ref[two_nt + 1 + col] + three_dc_val + 2 */ 7010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_add_epi16(src_temp2, src_temp1); 7020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp10 = _mm_add_epi16(src_temp10, src_temp1); 7030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(pu1_ref[two_nt + 1 + col] + three_dc_val + 2)>>2 */ 7040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_srli_epi16(src_temp2, 2); 7050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp10 = _mm_srli_epi16(src_temp10, 2); 7060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_packus_epi16(src_temp2, src_temp10); 7080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst), src_temp2); 7100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* retore first value*/ 7120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst[0] = ((pu1_ref[two_nt - 1] + two_dc_val + pu1_ref[two_nt + 1] + 2) 7130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar >> 2); 7140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = 1; row < nt; row++) 7160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst[row * dst_strd] = (pu1_ref[two_nt - 1 - row] + three_dc_val + 2) 7170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar >> 2; 7180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Fill the remaining rows with DC value*/ 7190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_set1_epi8(dc_val); 7200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_set1_epi8(dc_val); 7210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_set1_epi8(dc_val); 7220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_set1_epi8(dc_val); 7230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5 = _mm_set1_epi8(dc_val); 7240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6 = _mm_set1_epi8(dc_val); 7250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7 = _mm_set1_epi8(dc_val); 7260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = 1; row < nt; row += 8) 7280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 7290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_insert_epi8(src_temp1, pu1_dst[((1) * dst_strd)], 0); 7310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_insert_epi8(src_temp2, pu1_dst[((2) * dst_strd)], 0); 7320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_insert_epi8(src_temp3, pu1_dst[((3) * dst_strd)], 0); 7330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_insert_epi8(src_temp4, pu1_dst[((4) * dst_strd)], 0); 7340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5 = _mm_insert_epi8(src_temp5, pu1_dst[((5) * dst_strd)], 0); 7350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6 = _mm_insert_epi8(src_temp6, pu1_dst[((6) * dst_strd)], 0); 7360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7 = _mm_insert_epi8(src_temp7, pu1_dst[((7) * dst_strd)], 0); 7370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((1) * dst_strd)), src_temp1); 7390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((2) * dst_strd)), src_temp2); 7400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((3) * dst_strd)), src_temp3); 7410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((4) * dst_strd)), src_temp4); 7420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((5) * dst_strd)), src_temp5); 7430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((6) * dst_strd)), src_temp6); 7440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((7) * dst_strd)), src_temp7); 7450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_insert_epi8(src_temp1, pu1_dst[((8) * dst_strd)], 0); 7470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_insert_epi8(src_temp2, pu1_dst[((9) * dst_strd)], 0); 7480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_insert_epi8(src_temp3, pu1_dst[((10) * dst_strd)], 0); 7490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_insert_epi8(src_temp4, pu1_dst[((11) * dst_strd)], 0); 7500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5 = _mm_insert_epi8(src_temp5, pu1_dst[((12) * dst_strd)], 0); 7510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6 = _mm_insert_epi8(src_temp6, pu1_dst[((13) * dst_strd)], 0); 7520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7 = _mm_insert_epi8(src_temp7, pu1_dst[((14) * dst_strd)], 0); 7530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((8) * dst_strd)), src_temp1); 7550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((9) * dst_strd)), src_temp2); 7560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((10) * dst_strd)), src_temp3); 7570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_insert_epi8(src_temp1, pu1_dst[((15) * dst_strd)], 0); 7590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((11) * dst_strd)), src_temp4); 7610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((12) * dst_strd)), src_temp5); 7620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((13) * dst_strd)), src_temp6); 7630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((14) * dst_strd)), src_temp7); 7640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((15) * dst_strd)), src_temp1); 7660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 7680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 7700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else if(nt == 32) /* if nt%8==0*/ 7710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 7720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp11, src_temp12, src_temp13, src_temp14, src_temp15, src_temp16, src_temp17; 7740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_loadu_si128((__m128i *)(pu1_ref + nt)); 7760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + nt + 16)); 7770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7 = _mm_loadu_si128((__m128i *)(pu1_ref + nt + 32)); 7780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8 = _mm_loadu_si128((__m128i *)(pu1_ref + nt + 48)); 7790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loding 8-bit 16 pixel */ 7810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt + 1)); 7820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt + 1 + 8)); 7830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp9 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt + 1 + 16)); 7840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp10 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt + 1 + 24)); 7850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_sad_epu8(src_temp3, m_zero); 7870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_sad_epu8(src_temp4, m_zero); 7880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7 = _mm_sad_epu8(src_temp7, m_zero); 7890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8 = _mm_sad_epu8(src_temp8, m_zero); 7900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_cvtepu8_epi16(src_temp2); 7920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6 = _mm_cvtepu8_epi16(src_temp6); 7930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp9 = _mm_cvtepu8_epi16(src_temp9); 7940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp10 = _mm_cvtepu8_epi16(src_temp10); 7950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_add_epi16(src_temp3, src_temp4); 7970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8 = _mm_add_epi16(src_temp7, src_temp8); 7980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_add_epi16(src_temp4, src_temp8); 7990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_shuffle_epi8(src_temp4, sm); 8010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_hadd_epi16(src_temp4, m_zero); 8020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar acc_dc = _mm_cvtsi128_si32(src_temp4); 8040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar acc_dc += pu1_ref[three_nt]; 8060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar acc_dc -= pu1_ref[two_nt]; 8070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* computing acc_dc value */ 8090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar dc_val = (acc_dc + nt) >> (log2nt + 1); 8110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar three_dc_val = 3 * dc_val; 8130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_set1_epi16(three_dc_val + 2); 8140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_dc_val = 2 * dc_val; 8150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(pu1_ref[two_nt + 1 + col] + three_dc_val + 2 */ 8170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_add_epi16(src_temp2, src_temp1); 8180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_add_epi16(src_temp6, src_temp1); 8190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_add_epi16(src_temp9, src_temp1); 8200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_add_epi16(src_temp10, src_temp1); 8210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(pu1_ref[two_nt + 1 + col] + three_dc_val + 2)>>2 */ 8230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_srli_epi16(src_temp2, 2); 8240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6 = _mm_srli_epi16(src_temp6, 2); 8250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp9 = _mm_srli_epi16(src_temp9, 2); 8260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp10 = _mm_srli_epi16(src_temp10, 2); 8270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_packus_epi16(src_temp2, src_temp6); 8290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp10 = _mm_packus_epi16(src_temp9, src_temp10); 8300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst), src_temp2); 8320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16), src_temp10); 8330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* retore first value*/ 8350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst[0] = ((pu1_ref[two_nt - 1] + two_dc_val + pu1_ref[two_nt + 1] + 2) 8360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar >> 2); 8370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = 1; row < nt; row++) 8390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst[row * dst_strd] = (pu1_ref[two_nt - 1 - row] + three_dc_val + 2) 8400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar >> 2; 8410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Fill the remaining rows with DC value*/ 8420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_insert_epi8(src_temp1, dc_val, 0); 8430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = src_temp1; 8450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = src_temp1; 8460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = src_temp1; 8470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5 = src_temp1; 8480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6 = src_temp1; 8490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7 = src_temp1; 8500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12 = src_temp1; 8520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13 = src_temp1; 8530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14 = src_temp1; 8540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp15 = src_temp1; 8550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp16 = src_temp1; 8560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp17 = src_temp1; 8570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11 = src_temp1; 8580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = 1; row < nt; row++) 8600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 8610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_insert_epi8(src_temp1, pu1_dst[((1) * dst_strd)], 0); 8620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_insert_epi8(src_temp2, pu1_dst[((2) * dst_strd)], 0); 8630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_insert_epi8(src_temp3, pu1_dst[((3) * dst_strd)], 0); 8640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_insert_epi8(src_temp4, pu1_dst[((4) * dst_strd)], 0); 8650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5 = _mm_insert_epi8(src_temp5, pu1_dst[((5) * dst_strd)], 0); 8660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6 = _mm_insert_epi8(src_temp6, pu1_dst[((6) * dst_strd)], 0); 8670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7 = _mm_insert_epi8(src_temp7, pu1_dst[((7) * dst_strd)], 0); 8680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + (row * dst_strd)), src_temp1); 8700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + (row * dst_strd) + 16), src_temp11); 8710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 1) * dst_strd)), src_temp2); 8720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 1) * dst_strd) + 16), src_temp12); 8730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 2) * dst_strd)), src_temp3); 8740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 2) * dst_strd) + 16), src_temp13); 8750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 3) * dst_strd)), src_temp4); 8770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 3) * dst_strd) + 16), src_temp14); 8780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 4) * dst_strd)), src_temp5); 8790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 5) * dst_strd) + 16), src_temp15); 8800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 5) * dst_strd)), src_temp6); 8810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 5) * dst_strd) + 16), src_temp16); 8820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 6) * dst_strd)), src_temp7); 8830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 6) * dst_strd) + 16), src_temp17); 8840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 8870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 8890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 8900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar} 8910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/** 8930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 8940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 8950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @brief 8960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Intra prediction interpolation filter for horizontal luma variable. 8970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 8980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @par Description: 8990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Horizontal intraprediction(mode 10) with reference samples location 9000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* pointed by 'pu1_ref' to the TU block location pointed by 'pu1_dst' Refer 9010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* to section 8.4.4.2.6 in the standard (Special case) 9020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 9030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pu1_src 9040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* UWORD8 pointer to the source 9050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 9060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[out] pu1_dst 9070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* UWORD8 pointer to the destination 9080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 9090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] src_strd 9100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer source stride 9110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 9120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] dst_strd 9130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer destination stride 9140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 9150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] nt 9160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer Transform Block size 9170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 9180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] mode 9190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer intraprediction mode 9200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 9210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @returns 9220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 9230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @remarks 9240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* None 9250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 9260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 9270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*/ 9280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarvoid ihevc_intra_pred_luma_horz_sse42(UWORD8 *pu1_ref, 9300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 src_strd, 9310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst, 9320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 dst_strd, 9330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 nt, 9340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 mode) 9350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar{ 9360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 row; 9380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 two_nt; 9390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UNUSED(src_strd); 9400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UNUSED(mode); 9410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_nt = 2 * nt; 9430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar if(nt == 32) 9460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 9470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp1, src_temp2, src_temp3, src_temp4, src_temp5, src_temp6, src_temp7, src_temp8; 9480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp9, src_temp10, src_temp11, src_temp12, src_temp13, src_temp14, src_temp15, src_temp16; 9490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i sm = _mm_loadu_si128((__m128i *)&IHEVCE_SHUFFLEMASK4[0]); 9500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = 0; row < nt; row += 16) 9520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 9530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 9540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt - 1 - row - 15)); 9550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_srli_si128(src_temp1, 1); 9570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_srli_si128(src_temp1, 2); 9580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_srli_si128(src_temp1, 3); 9590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5 = _mm_srli_si128(src_temp1, 4); 9600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6 = _mm_srli_si128(src_temp1, 5); 9610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7 = _mm_srli_si128(src_temp1, 6); 9620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8 = _mm_srli_si128(src_temp1, 7); 9630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp9 = _mm_srli_si128(src_temp1, 8); 9650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp10 = _mm_srli_si128(src_temp1, 9); 9660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11 = _mm_srli_si128(src_temp1, 10); 9670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12 = _mm_srli_si128(src_temp1, 11); 9680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13 = _mm_srli_si128(src_temp1, 12); 9690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14 = _mm_srli_si128(src_temp1, 13); 9700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp15 = _mm_srli_si128(src_temp1, 14); 9710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp16 = _mm_srli_si128(src_temp1, 15); 9720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8 = _mm_shuffle_epi8(src_temp8, sm); 9740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7 = _mm_shuffle_epi8(src_temp7, sm); 9750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6 = _mm_shuffle_epi8(src_temp6, sm); 9760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5 = _mm_shuffle_epi8(src_temp5, sm); 9770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_shuffle_epi8(src_temp4, sm); 9780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_shuffle_epi8(src_temp3, sm); 9790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_shuffle_epi8(src_temp2, sm); 9800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_shuffle_epi8(src_temp1, sm); 9810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp16 = _mm_shuffle_epi8(src_temp16, sm); 9830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp15 = _mm_shuffle_epi8(src_temp15, sm); 9840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14 = _mm_shuffle_epi8(src_temp14, sm); 9850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13 = _mm_shuffle_epi8(src_temp13, sm); 9860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12 = _mm_shuffle_epi8(src_temp12, sm); 9870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11 = _mm_shuffle_epi8(src_temp11, sm); 9880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp10 = _mm_shuffle_epi8(src_temp10, sm); 9890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp9 = _mm_shuffle_epi8(src_temp9, sm); 9900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 0) * dst_strd)), src_temp16); 9920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 1) * dst_strd)), src_temp15); 9930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 2) * dst_strd)), src_temp14); 9940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 3) * dst_strd)), src_temp13); 9950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 4) * dst_strd)), src_temp12); 9960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 5) * dst_strd)), src_temp11); 9970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 6) * dst_strd)), src_temp10); 9980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 7) * dst_strd)), src_temp9); 9990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 8) * dst_strd)), src_temp8); 10010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 9) * dst_strd)), src_temp7); 10020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 10) * dst_strd)), src_temp6); 10030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 11) * dst_strd)), src_temp5); 10040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 12) * dst_strd)), src_temp4); 10050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 13) * dst_strd)), src_temp3); 10060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 14) * dst_strd)), src_temp2); 10070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 15) * dst_strd)), src_temp1); 10080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((row + 0) * dst_strd)), src_temp16); 10100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((row + 1) * dst_strd)), src_temp15); 10110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((row + 2) * dst_strd)), src_temp14); 10120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((row + 3) * dst_strd)), src_temp13); 10130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((row + 4) * dst_strd)), src_temp12); 10140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((row + 5) * dst_strd)), src_temp11); 10150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((row + 6) * dst_strd)), src_temp10); 10160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((row + 7) * dst_strd)), src_temp9); 10170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((row + 8) * dst_strd)), src_temp8); 10190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((row + 9) * dst_strd)), src_temp7); 10200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((row + 10) * dst_strd)), src_temp6); 10210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((row + 11) * dst_strd)), src_temp5); 10220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((row + 12) * dst_strd)), src_temp4); 10230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((row + 13) * dst_strd)), src_temp3); 10240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((row + 14) * dst_strd)), src_temp2); 10250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((row + 15) * dst_strd)), src_temp1); 10260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 10280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 10300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 10320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else 10330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 10350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp1, src_temp2, src_temp3, src_temp4, src_temp5, src_temp6; 10360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp10, zero_8x16b, src_temp7; 10370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* DC filtering for the first top row and first left column */ 10390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar zero_8x16b = _mm_set1_epi16(0); 10410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*Filtering done for the 1st row */ 10430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_set1_epi16(pu1_ref[two_nt - 1]); 10450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp10 = _mm_set1_epi16(pu1_ref[two_nt]); 10460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loding 8-bit 16 pixels */ 10480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt + 1)); 10490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_cvtepu8_epi16(src_temp4); 10510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(pu1_ref[two_nt + 1 + col] - pu1_ref[two_nt])*/ 10530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_sub_epi16(src_temp4, src_temp10); 10540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* ((pu1_ref[two_nt + 1 + col] - pu1_ref[two_nt]) >> 1)*/ 10560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_srai_epi16(src_temp3, 1); 10570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* pu1_ref[two_nt - 1]+((pu1_ref[two_nt + 1 + col] - pu1_ref[two_nt]) >> 1)*/ 10590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_add_epi16(src_temp2, src_temp3); 10600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar if(nt == 4) 10620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 10630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int temp1, temp2, temp3; 10640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_packus_epi16(src_temp3, zero_8x16b); 10650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1 = _mm_cvtsi128_si32(src_temp3); 10660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[0]) = temp1; 10680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_set1_epi8(pu1_ref[two_nt - 2]); 10700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_set1_epi8(pu1_ref[two_nt - 3]); 10710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_set1_epi8(pu1_ref[two_nt - 4]); 10720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1 = _mm_cvtsi128_si32(src_temp2); 10740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp2 = _mm_cvtsi128_si32(src_temp3); 10750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp3 = _mm_cvtsi128_si32(src_temp4); 10760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*pu1_dst[(row * dst_strd) + col] = pu1_ref[two_nt - 1 - row];*/ 10780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(1 * dst_strd)]) = temp1; 10790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(2 * dst_strd)]) = temp2; 10800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(3 * dst_strd)]) = temp3; 10810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 10830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else if(nt == 8) 10840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 10850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp10 = _mm_packus_epi16(src_temp3, zero_8x16b); 10860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_set1_epi8(pu1_ref[two_nt - 2]); 10890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_set1_epi8(pu1_ref[two_nt - 3]); 10900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_set1_epi8(pu1_ref[two_nt - 4]); 10910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_set1_epi8(pu1_ref[two_nt - 5]); 10920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5 = _mm_set1_epi8(pu1_ref[two_nt - 6]); 10930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6 = _mm_set1_epi8(pu1_ref[two_nt - 7]); 10940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7 = _mm_set1_epi8(pu1_ref[two_nt - 8]); 10950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst), src_temp10); 10970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 10980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*pu1_dst[(row * dst_strd) + col] = pu1_ref[two_nt - 1 - row];*/ 10990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + (1 * dst_strd)), src_temp1); 11000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + (2 * dst_strd)), src_temp2); 11010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + (3 * dst_strd)), src_temp3); 11020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + (4 * dst_strd)), src_temp4); 11030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + (5 * dst_strd)), src_temp5); 11040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + (6 * dst_strd)), src_temp6); 11050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + (7 * dst_strd)), src_temp7); 11060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 11070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 11080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else if(nt == 16) 11090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 11100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt + 1 + 8)); 11110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_cvtepu8_epi16(src_temp4); 11120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 11130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp10 = _mm_sub_epi16(src_temp4, src_temp10); 11140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp10 = _mm_srai_epi16(src_temp10, 1); 11150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp10 = _mm_add_epi16(src_temp2, src_temp10); 11160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 11170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_packus_epi16(src_temp3, src_temp10); 11180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst), src_temp3); 11190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 11200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*pu1_dst[(row * dst_strd) + col] = pu1_ref[two_nt - 1 - row];*/ 11210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_set1_epi8(pu1_ref[two_nt - 2]); 11220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_set1_epi8(pu1_ref[two_nt - 3]); 11230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_set1_epi8(pu1_ref[two_nt - 4]); 11240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_set1_epi8(pu1_ref[two_nt - 5]); 11250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5 = _mm_set1_epi8(pu1_ref[two_nt - 6]); 11260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6 = _mm_set1_epi8(pu1_ref[two_nt - 7]); 11270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7 = _mm_set1_epi8(pu1_ref[two_nt - 8]); 11280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp10 = _mm_set1_epi8(pu1_ref[two_nt - 9]); 11290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 11300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((1) * dst_strd)), src_temp1); 11310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((2) * dst_strd)), src_temp2); 11320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((3) * dst_strd)), src_temp3); 11330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((4) * dst_strd)), src_temp4); 11340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((5) * dst_strd)), src_temp5); 11350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((6) * dst_strd)), src_temp6); 11360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((7) * dst_strd)), src_temp7); 11370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((8) * dst_strd)), src_temp10); 11380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 11390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_set1_epi8(pu1_ref[two_nt - 10]); 11400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_set1_epi8(pu1_ref[two_nt - 11]); 11410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_set1_epi8(pu1_ref[two_nt - 12]); 11420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_set1_epi8(pu1_ref[two_nt - 13]); 11430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5 = _mm_set1_epi8(pu1_ref[two_nt - 14]); 11440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6 = _mm_set1_epi8(pu1_ref[two_nt - 15]); 11450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7 = _mm_set1_epi8(pu1_ref[two_nt - 16]); 11460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 11470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((9) * dst_strd)), src_temp1); 11480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((10) * dst_strd)), src_temp2); 11490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((11) * dst_strd)), src_temp3); 11500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((12) * dst_strd)), src_temp4); 11510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((13) * dst_strd)), src_temp5); 11520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((14) * dst_strd)), src_temp6); 11530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((15) * dst_strd)), src_temp7); 11540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 11550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 11560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 11570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar} 11580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 11590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/** 11600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 11610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 11620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @brief 11630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Intra prediction interpolation filter for vertical luma variable. 11640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 11650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @par Description: 11660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Horizontal intraprediction with reference neighboring samples location 11670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* pointed by 'pu1_ref' to the TU block location pointed by 'pu1_dst' Refer 11680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* to section 8.4.4.2.6 in the standard (Special case) 11690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 11700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pu1_src 11710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* UWORD8 pointer to the source 11720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 11730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[out] pu1_dst 11740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* UWORD8 pointer to the destination 11750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 11760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] src_strd 11770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer source stride 11780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 11790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] dst_strd 11800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer destination stride 11810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 11820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] nt 11830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer Transform Block size 11840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 11850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] mode 11860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer intraprediction mode 11870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 11880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @returns 11890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 11900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @remarks 11910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* None 11920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 11930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 11940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*/ 11950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 11960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 11970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarvoid ihevc_intra_pred_luma_ver_sse42(UWORD8 *pu1_ref, 11980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 src_strd, 11990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst, 12000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 dst_strd, 12010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 nt, 12020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 mode) 12030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar{ 12040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 row; 12050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 s2_predpixel; 12060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 two_nt = 2 * nt; 12070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp0, src_temp1, src_temp2, src_temp3, src_temp4, src_temp5, src_temp6, src_temp7; 12080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 12090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UNUSED(src_strd); 12100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UNUSED(mode); 12110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 12120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar if(nt == 32) 12130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 12140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i temp1, temp2; 12150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 itr_count; 12160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 12170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt + 1)); 12180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp2 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt + 1 + 16)); 12190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 12200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(itr_count = 0; itr_count < 2; itr_count++) 12210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 12220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* pu1_dst[(row * dst_strd) + col] = dc_val;*/ 12230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((0) * dst_strd)), temp1); 12240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((1) * dst_strd)), temp1); 12250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((2) * dst_strd)), temp1); 12260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((3) * dst_strd)), temp1); 12270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((4) * dst_strd)), temp1); 12280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((5) * dst_strd)), temp1); 12290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((6) * dst_strd)), temp1); 12300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((7) * dst_strd)), temp1); 12310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 12320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((0) * dst_strd)), temp2); 12330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((1) * dst_strd)), temp2); 12340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((2) * dst_strd)), temp2); 12350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((3) * dst_strd)), temp2); 12360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((4) * dst_strd)), temp2); 12370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((5) * dst_strd)), temp2); 12380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((6) * dst_strd)), temp2); 12390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((7) * dst_strd)), temp2); 12400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 12410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((8) * dst_strd)), temp1); 12420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((9) * dst_strd)), temp1); 12430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((10) * dst_strd)), temp1); 12440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((11) * dst_strd)), temp1); 12450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((12) * dst_strd)), temp1); 12460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((13) * dst_strd)), temp1); 12470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((14) * dst_strd)), temp1); 12480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((15) * dst_strd)), temp1); 12490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 12500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((8) * dst_strd)), temp2); 12510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((9) * dst_strd)), temp2); 12520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((10) * dst_strd)), temp2); 12530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((11) * dst_strd)), temp2); 12540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((12) * dst_strd)), temp2); 12550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((13) * dst_strd)), temp2); 12560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((14) * dst_strd)), temp2); 12570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + 16 + ((15) * dst_strd)), temp2); 12580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 12590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst += 16 * dst_strd; 12600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 12610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 12620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 12630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else 12640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 12650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 12660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*Filtering done for the 1st column */ 12670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = nt - 1; row >= 0; row--) 12680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 12690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar s2_predpixel = pu1_ref[two_nt + 1] 12700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar + ((pu1_ref[two_nt - 1 - row] - pu1_ref[two_nt]) >> 1); 12710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst[row * dst_strd] = CLIP_U8(s2_predpixel); 12720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 12730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 12740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Replication to next columns*/ 12750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 12760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar if(nt == 4) 12770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 12780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int temp1, temp2, temp3, temp4; 12790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 12800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt + 1)); 12810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = src_temp2; 12820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = src_temp2; 12830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5 = src_temp2; 12840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 12850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_insert_epi8(src_temp2, pu1_dst[(0 * dst_strd)], 0); 12860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_insert_epi8(src_temp3, pu1_dst[(1 * dst_strd)], 0); 12870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_insert_epi8(src_temp4, pu1_dst[(2 * dst_strd)], 0); 12880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5 = _mm_insert_epi8(src_temp5, pu1_dst[(3 * dst_strd)], 0); 12890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 12900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1 = _mm_cvtsi128_si32(src_temp2); 12910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp2 = _mm_cvtsi128_si32(src_temp3); 12920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp3 = _mm_cvtsi128_si32(src_temp4); 12930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp4 = _mm_cvtsi128_si32(src_temp5); 12940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 12950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loding 4-bit 8 pixels values */ 12960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(0 * dst_strd)]) = temp1; 12970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(1 * dst_strd)]) = temp2; 12980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(2 * dst_strd)]) = temp3; 12990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(3 * dst_strd)]) = temp4; 13000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 13010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 13020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else if(nt == 8) 13030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 13040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 13050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp0 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt + 1)); 13060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = src_temp0; 13070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = src_temp0; 13080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = src_temp0; 13090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = src_temp0; 13100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5 = src_temp0; 13110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6 = src_temp0; 13120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7 = src_temp0; 13130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 13140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp0 = _mm_insert_epi8(src_temp0, pu1_dst[((0) * dst_strd)], 0); 13150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_insert_epi8(src_temp1, pu1_dst[((1) * dst_strd)], 0); 13160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_insert_epi8(src_temp2, pu1_dst[((2) * dst_strd)], 0); 13170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_insert_epi8(src_temp3, pu1_dst[((3) * dst_strd)], 0); 13180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_insert_epi8(src_temp4, pu1_dst[((4) * dst_strd)], 0); 13190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5 = _mm_insert_epi8(src_temp5, pu1_dst[((5) * dst_strd)], 0); 13200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6 = _mm_insert_epi8(src_temp6, pu1_dst[((6) * dst_strd)], 0); 13210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7 = _mm_insert_epi8(src_temp7, pu1_dst[((7) * dst_strd)], 0); 13220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 13230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + ((0) * dst_strd)), src_temp0); 13240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + ((1) * dst_strd)), src_temp1); 13250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + ((2) * dst_strd)), src_temp2); 13260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + ((3) * dst_strd)), src_temp3); 13270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + ((4) * dst_strd)), src_temp4); 13280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + ((5) * dst_strd)), src_temp5); 13290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + ((6) * dst_strd)), src_temp6); 13300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + ((7) * dst_strd)), src_temp7); 13310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 13320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 13330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 13340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else if(nt == 16) 13350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 13360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = 0; row < nt; row += 8) 13370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 13380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 13390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp0 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt + 1)); 13400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = src_temp0; 13410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = src_temp0; 13420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = src_temp0; 13430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = src_temp0; 13440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5 = src_temp0; 13450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6 = src_temp0; 13460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7 = src_temp0; 13470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 13480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp0 = _mm_insert_epi8(src_temp0, pu1_dst[((row + 0) * dst_strd)], 0); 13490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1 = _mm_insert_epi8(src_temp1, pu1_dst[((row + 1) * dst_strd)], 0); 13500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2 = _mm_insert_epi8(src_temp2, pu1_dst[((row + 2) * dst_strd)], 0); 13510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3 = _mm_insert_epi8(src_temp3, pu1_dst[((row + 3) * dst_strd)], 0); 13520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4 = _mm_insert_epi8(src_temp4, pu1_dst[((row + 4) * dst_strd)], 0); 13530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5 = _mm_insert_epi8(src_temp5, pu1_dst[((row + 5) * dst_strd)], 0); 13540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6 = _mm_insert_epi8(src_temp6, pu1_dst[((row + 6) * dst_strd)], 0); 13550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7 = _mm_insert_epi8(src_temp7, pu1_dst[((row + 7) * dst_strd)], 0); 13560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 13570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 0) * dst_strd)), src_temp0); 13580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 1) * dst_strd)), src_temp1); 13590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 2) * dst_strd)), src_temp2); 13600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 3) * dst_strd)), src_temp3); 13610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 4) * dst_strd)), src_temp4); 13620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 5) * dst_strd)), src_temp5); 13630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 6) * dst_strd)), src_temp6); 13640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + ((row + 7) * dst_strd)), src_temp7); 13650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 13660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 13670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 13680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 13690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 13700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 13710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 13720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar} 13730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 13740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 13750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/** 13760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 13770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 13780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @brief 13790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Intra prediction interpolation filter for luma mode 3 to mode 9 13800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 13810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @par Description: 13820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Intraprediction for mode 3 to 9 (positive angle, horizontal mode ) with 13830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* reference neighboring samples location pointed by 'pu1_ref' to the TU 13840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* block location pointed by 'pu1_dst' 13850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 13860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pu1_src 13870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* UWORD8 pointer to the source 13880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 13890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[out] pu1_dst 13900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* UWORD8 pointer to the destination 13910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 13920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] src_strd 13930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer source stride 13940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 13950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] dst_strd 13960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer destination stride 13970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 13980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] nt 13990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer Transform Block size 14000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 14010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] mode 14020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer intraprediction mode 14030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 14040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @returns 14050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 14060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @remarks 14070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* None 14080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 14090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 14100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*/ 14110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarvoid ihevc_intra_pred_luma_mode_3_to_9_sse42(UWORD8 *pu1_ref, 14140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 src_strd, 14150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst, 14160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 dst_strd, 14170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 nt, 14180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 mode) 14190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar{ 14200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 row, col; 14210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 two_nt = 2 * nt; 14220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 intra_pred_ang; 14230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i const_temp_4x32b, const_temp2_4x32b, const_temp3_4x32b, const_temp4_4x32b; 14260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i fract_4x32b, intra_pred_ang_4x32b; 14270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i row_4x32b, two_nt_4x32b, ref_main_idx_4x32b, res_temp5_4x32b, sm3; 14280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UNUSED(src_strd); 14290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Intra Pred Angle according to the mode */ 14320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar intra_pred_ang = gai4_ihevc_ang_table[mode]; 14330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* For the angles other then 45 degree, interpolation btw 2 neighboring */ 14350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* samples dependent on distance to obtain destination sample */ 14360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* For the angles other then 45 degree, interpolation btw 2 neighboring */ 14380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* samples dependent on distance to obtain destination sample */ 14390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp_4x32b = _mm_set1_epi16(16); 14410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp2_4x32b = _mm_set1_epi32(31); 14420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp3_4x32b = _mm_set1_epi32(32); 14430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp4_4x32b = _mm_set1_epi32(4); 14440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_nt_4x32b = _mm_set1_epi32(two_nt - nt); 14460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sm3 = _mm_load_si128((__m128i *)&IHEVCE_SHUFFLEMASKY11[0]); 14490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* intra_pred_ang = gai4_ihevc_ang_table[mode]; */ 14510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar intra_pred_ang_4x32b = _mm_set1_epi32(intra_pred_ang); 14520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_set_epi32(4, 3, 2, 1); 14540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar if(nt == 4) 14560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 14570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 ref_main_idx1, ref_main_idx2, ref_main_idx3, ref_main_idx4; 14590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int temp11, temp21, temp31, temp41; 14600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar // WORD8 ai1_fract_temp_val[16], ai1_row_temp_val[16]; 14610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i fract1_8x16b, fract2_8x16b; 14630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i temp1_8x16b, temp2_8x16b, temp3_8x16b, temp4_8x16b; 14640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp1_8x16b, src_temp2_8x16b, src_temp3_8x16b, src_temp4_8x16b; 14660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp5_8x16b, src_temp6_8x16b, src_temp7_8x16b; //, src_temp8_8x16b; 14670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i ref_main_temp0, ref_main_temp1, ref_main_temp2; 14680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* pos = ((row + 1) * intra_pred_ang); */ 14700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar res_temp5_4x32b = _mm_mullo_epi32(row_4x32b, intra_pred_ang_4x32b); 14710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* idx = pos >> 5; */ 14730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract_4x32b = _mm_and_si128(res_temp5_4x32b, const_temp2_4x32b); 14740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract = pos & (31); */ 14760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_idx_4x32b = _mm_sub_epi32(two_nt_4x32b, _mm_srai_epi32(res_temp5_4x32b, 5)); 14770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(32 - fract) */ 14790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_sub_epi32(const_temp3_4x32b, fract_4x32b); 14800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_slli_epi16(fract_4x32b, 8); 14820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_slli_epi16(row_4x32b, 8); 14830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract_4x32b = _mm_or_si128(fract_4x32b, fract1_8x16b); 14850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_or_si128(row_4x32b, fract2_8x16b); /*(32 - fract) */ 14860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_unpackhi_epi8(row_4x32b, fract_4x32b); 14880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_unpacklo_epi8(row_4x32b, fract_4x32b); 14890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1_8x16b = _mm_shuffle_epi32(fract1_8x16b, 0x00); 14910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp2_8x16b = _mm_shuffle_epi32(fract1_8x16b, 0xaa); 14920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp3_8x16b = _mm_shuffle_epi32(fract2_8x16b, 0x00); 14930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp4_8x16b = _mm_shuffle_epi32(fract2_8x16b, 0xaa); 14940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 14950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_temp0 = _mm_srli_si128(ref_main_idx_4x32b, 4); /* next 32 bit values */ 14960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_temp1 = _mm_srli_si128(ref_main_idx_4x32b, 8); /* next 32 bit values */ 14970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_temp2 = _mm_srli_si128(ref_main_idx_4x32b, 12); /* next 32 bit values */ 14980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_idx1 = _mm_cvtsi128_si32(ref_main_idx_4x32b); /* col=0*/ 14990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_idx2 = _mm_cvtsi128_si32(ref_main_temp0); /* col=1*/ 15000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_idx3 = _mm_cvtsi128_si32(ref_main_temp1); /* col=2*/ 15010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_idx4 = _mm_cvtsi128_si32(ref_main_temp2); /* col=3*/ 15020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loding 8-bit 16 pixels */ 15040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx1 - 1)); /* col=0*/ 15050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx2 - 1)); /* col=1*/ 15060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx3 - 1)); /* col=2*/ 15070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx4 - 1)); /* col=3*/ 15080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_shuffle_epi8(src_temp1_8x16b, sm3); /* col=0*/ 15100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_shuffle_epi8(src_temp2_8x16b, sm3); /* col=1*/ 15110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_shuffle_epi8(src_temp3_8x16b, sm3); /* col=2*/ 15120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_shuffle_epi8(src_temp4_8x16b, sm3); /* col=3*/ 15130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract*(pu1_ref[ref_main_idx + 1]- pu1_ref[ref_main_idx]) */ 15150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_maddubs_epi16(src_temp1_8x16b, temp1_8x16b); 15160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_maddubs_epi16(src_temp2_8x16b, temp2_8x16b); 15170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_maddubs_epi16(src_temp3_8x16b, temp3_8x16b); 15180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_maddubs_epi16(src_temp4_8x16b, temp4_8x16b); 15190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 15210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_add_epi16(src_temp1_8x16b, const_temp_4x32b); 15220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_add_epi16(src_temp2_8x16b, const_temp_4x32b); 15230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_add_epi16(src_temp3_8x16b, const_temp_4x32b); 15240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_add_epi16(src_temp4_8x16b, const_temp_4x32b); 15250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 15270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_srai_epi16(src_temp1_8x16b, 5); /* col=0*/ 15280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_srai_epi16(src_temp2_8x16b, 5); /* col=1*/ 15290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_srai_epi16(src_temp3_8x16b, 5); /* col=2*/ 15300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_srai_epi16(src_temp4_8x16b, 5); /* col=3*/ 15310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 15330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_packus_epi16(src_temp1_8x16b, src_temp3_8x16b); /* col=0*/ 15340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_packus_epi16(src_temp2_8x16b, src_temp4_8x16b); /* col=1*/ 15350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5_8x16b = _mm_unpacklo_epi8(src_temp1_8x16b, src_temp2_8x16b); 15380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6_8x16b = _mm_unpackhi_epi8(src_temp1_8x16b, src_temp2_8x16b); 15390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_unpacklo_epi16(src_temp5_8x16b, src_temp6_8x16b); 15410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_srli_si128(src_temp3_8x16b, 4); 15420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_srli_si128(src_temp3_8x16b, 8); 15430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7_8x16b = _mm_srli_si128(src_temp3_8x16b, 12); 15440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp11 = _mm_cvtsi128_si32(src_temp7_8x16b); 15460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp21 = _mm_cvtsi128_si32(src_temp1_8x16b); 15470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp31 = _mm_cvtsi128_si32(src_temp2_8x16b); 15480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp41 = _mm_cvtsi128_si32(src_temp3_8x16b); 15490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loding 4-bit 8 pixels values */ 15510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(0 * dst_strd)]) = temp11; 15520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(1 * dst_strd)]) = temp21; 15530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(2 * dst_strd)]) = temp31; 15540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(3 * dst_strd)]) = temp41; 15550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 15570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else if(nt == 16 || nt == 32) 15590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 15600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar intra_pred_ang_4x32b = _mm_set1_epi16(intra_pred_ang); 15610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1); 15620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp2_4x32b = _mm_set1_epi16(31); 15630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp4_4x32b = _mm_set1_epi16(8); 15640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp3_4x32b = _mm_set1_epi16(32); 15650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_nt_4x32b = _mm_set1_epi16(two_nt); 15660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(col = 0; col < nt; col += 8) 15680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 15690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 pi2_ref_main_idx1, pi2_ref_main_idx2, pi2_ref_main_idx3, pi2_ref_main_idx4; 15700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 pi2_ref_main_idx5, pi2_ref_main_idx6, pi2_ref_main_idx7, pi2_ref_main_idx8; 15710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //WORD8 ai1_fract_temp0_val[16], ai1_fract_temp1_val[16]; 15720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i fract1_8x16b, fract2_8x16b, fract3_8x16b, fract8_8x16b; 15740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i temp1_8x16b, temp2_8x16b, temp3_8x16b, temp4_8x16b; 15760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i temp11_8x16b, temp12_8x16b, temp13_8x16b, temp14_8x16b; 15770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* pos = ((row + 1) * intra_pred_ang); */ 15790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar res_temp5_4x32b = _mm_mullo_epi16(row_4x32b, intra_pred_ang_4x32b); 15800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* idx = pos >> 5; */ 15820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract_4x32b = _mm_and_si128(res_temp5_4x32b, const_temp2_4x32b); 15830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(32 - fract) */ 15850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_sub_epi16(const_temp3_4x32b, fract_4x32b); 15860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_slli_epi16(fract_4x32b, 8); 15880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract3_8x16b = _mm_slli_epi16(fract2_8x16b, 8); /*(32 - fract) */ 15890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract_4x32b = _mm_or_si128(fract_4x32b, fract1_8x16b); 15910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_or_si128(fract2_8x16b, fract3_8x16b); /*(32 - fract) */ 15920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract8_8x16b = _mm_unpackhi_epi8(fract2_8x16b, fract_4x32b); 15950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract_4x32b = _mm_unpacklo_epi8(fract2_8x16b, fract_4x32b); 15960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 15970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1_8x16b = _mm_shuffle_epi32(fract_4x32b, 0x00); 15980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp2_8x16b = _mm_shuffle_epi32(fract_4x32b, 0x55); 15990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp3_8x16b = _mm_shuffle_epi32(fract_4x32b, 0xaa); 16000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp4_8x16b = _mm_shuffle_epi32(fract_4x32b, 0xff); 16010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 16020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp11_8x16b = _mm_shuffle_epi32(fract8_8x16b, 0x00); 16030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp12_8x16b = _mm_shuffle_epi32(fract8_8x16b, 0x55); 16040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp13_8x16b = _mm_shuffle_epi32(fract8_8x16b, 0xaa); 16050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp14_8x16b = _mm_shuffle_epi32(fract8_8x16b, 0xff); 16060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 16070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract = pos & (31); */ 16080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_idx_4x32b = _mm_sub_epi16(two_nt_4x32b, _mm_srai_epi16(res_temp5_4x32b, 5)); 16090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 16100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_add_epi16(row_4x32b, const_temp4_4x32b); 16110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 16120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx1 = _mm_extract_epi16(ref_main_idx_4x32b, 0); /* col=0*/ 16130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx2 = _mm_extract_epi16(ref_main_idx_4x32b, 1); /* col=1*/ 16140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx3 = _mm_extract_epi16(ref_main_idx_4x32b, 2); /* col=2*/ 16150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx4 = _mm_extract_epi16(ref_main_idx_4x32b, 3); /* col=3*/ 16160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 16170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx5 = _mm_extract_epi16(ref_main_idx_4x32b, 4); /* col=5*/ 16180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx6 = _mm_extract_epi16(ref_main_idx_4x32b, 5); /* col=6*/ 16190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx7 = _mm_extract_epi16(ref_main_idx_4x32b, 6); /* col=7*/ 16200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx8 = _mm_extract_epi16(ref_main_idx_4x32b, 7); /* col=8*/ 16210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 16220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = 0; row < nt; row += 8) 16230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 16240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp1_8x16b, src_temp2_8x16b, src_temp3_8x16b, src_temp4_8x16b; 16250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp5_8x16b, src_temp6_8x16b, src_temp7_8x16b, src_temp8_8x16b; 16260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 16270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 16280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp11_8x16b, src_temp12_8x16b, src_temp13_8x16b, src_temp14_8x16b; 16290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp15_8x16b, src_temp16_8x16b, src_temp17_8x16b, src_temp18_8x16b; 16300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 16310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loding 8-bit 16 pixels */ 16320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_loadu_si128((__m128i *)(pu1_ref + pi2_ref_main_idx1 - 1 - (8 + row))); /* col=0*/ 16330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_loadu_si128((__m128i *)(pu1_ref + pi2_ref_main_idx2 - 1 - (8 + row))); /* col=1*/ 16340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_loadu_si128((__m128i *)(pu1_ref + pi2_ref_main_idx3 - 1 - (8 + row))); /* col=2*/ 16350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_loadu_si128((__m128i *)(pu1_ref + pi2_ref_main_idx4 - 1 - (8 + row))); /* col=3*/ 16360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 16370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loding 8-bit 16 pixels */ 16380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_loadu_si128((__m128i *)(pu1_ref + pi2_ref_main_idx5 - 1 - (8 + row))); /* col=5*/ 16390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_loadu_si128((__m128i *)(pu1_ref + pi2_ref_main_idx6 - 1 - (8 + row))); /* col=6*/ 16400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13_8x16b = _mm_loadu_si128((__m128i *)(pu1_ref + pi2_ref_main_idx7 - 1 - (8 + row))); /* col=7*/ 16410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14_8x16b = _mm_loadu_si128((__m128i *)(pu1_ref + pi2_ref_main_idx8 - 1 - (8 + row))); /* col=8*/ 16420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 16430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_shuffle_epi8(src_temp1_8x16b, sm3); /* col=0*/ 16440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_shuffle_epi8(src_temp2_8x16b, sm3); /* col=1*/ 16450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_shuffle_epi8(src_temp3_8x16b, sm3); /* col=2*/ 16460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_shuffle_epi8(src_temp4_8x16b, sm3); /* col=3*/ 16470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 16480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_shuffle_epi8(src_temp11_8x16b, sm3); /* col=0*/ 16490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_shuffle_epi8(src_temp12_8x16b, sm3); /* col=1*/ 16500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13_8x16b = _mm_shuffle_epi8(src_temp13_8x16b, sm3); /* col=2*/ 16510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14_8x16b = _mm_shuffle_epi8(src_temp14_8x16b, sm3); /* col=3*/ 16520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 16530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract*(pu1_ref[ref_main_idx + 1]- pu1_ref[ref_main_idx]) */ 16540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_maddubs_epi16(src_temp1_8x16b, temp1_8x16b); 16550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_maddubs_epi16(src_temp2_8x16b, temp2_8x16b); 16560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_maddubs_epi16(src_temp3_8x16b, temp3_8x16b); 16570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_maddubs_epi16(src_temp4_8x16b, temp4_8x16b); 16580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 16590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract*(pu1_ref[ref_main_idx + 1]- pu1_ref[ref_main_idx]) */ 16600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_maddubs_epi16(src_temp11_8x16b, temp11_8x16b); 16610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_maddubs_epi16(src_temp12_8x16b, temp12_8x16b); 16620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13_8x16b = _mm_maddubs_epi16(src_temp13_8x16b, temp13_8x16b); 16630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14_8x16b = _mm_maddubs_epi16(src_temp14_8x16b, temp14_8x16b); 16640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 16650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 16660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_add_epi16(src_temp1_8x16b, const_temp_4x32b); 16670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_add_epi16(src_temp2_8x16b, const_temp_4x32b); 16680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_add_epi16(src_temp3_8x16b, const_temp_4x32b); 16690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_add_epi16(src_temp4_8x16b, const_temp_4x32b); 16700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 16710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 16720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_srai_epi16(src_temp1_8x16b, 5); /* col=0*/ 16730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_srai_epi16(src_temp2_8x16b, 5); /* col=1*/ 16740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_srai_epi16(src_temp3_8x16b, 5); /* col=2*/ 16750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_srai_epi16(src_temp4_8x16b, 5); /* col=3*/ 16760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 16770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 16780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_add_epi16(src_temp11_8x16b, const_temp_4x32b); 16790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_add_epi16(src_temp12_8x16b, const_temp_4x32b); 16800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13_8x16b = _mm_add_epi16(src_temp13_8x16b, const_temp_4x32b); 16810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14_8x16b = _mm_add_epi16(src_temp14_8x16b, const_temp_4x32b); 16820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 16830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 16840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_srai_epi16(src_temp11_8x16b, 5); /* col=5*/ 16850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_srai_epi16(src_temp12_8x16b, 5); /* col=6*/ 16860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13_8x16b = _mm_srai_epi16(src_temp13_8x16b, 5); /* col=7*/ 16870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14_8x16b = _mm_srai_epi16(src_temp14_8x16b, 5); /* col=8*/ 16880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 16890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 16900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_packus_epi16(src_temp1_8x16b, src_temp3_8x16b); /* col=0*/ 16910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_packus_epi16(src_temp2_8x16b, src_temp4_8x16b); /* col=1*/ 16920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 16930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 16940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_packus_epi16(src_temp11_8x16b, src_temp13_8x16b); /* col=5*/ 16950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_packus_epi16(src_temp12_8x16b, src_temp14_8x16b); /* col=6*/ 16960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 16970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5_8x16b = _mm_unpacklo_epi8(src_temp1_8x16b, src_temp2_8x16b); 16980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6_8x16b = _mm_unpackhi_epi8(src_temp1_8x16b, src_temp2_8x16b); 16990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp15_8x16b = _mm_unpacklo_epi8(src_temp11_8x16b, src_temp12_8x16b); 17010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp16_8x16b = _mm_unpackhi_epi8(src_temp11_8x16b, src_temp12_8x16b); 17020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7_8x16b = _mm_unpacklo_epi16(src_temp5_8x16b, src_temp6_8x16b); 17040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8_8x16b = _mm_unpackhi_epi16(src_temp5_8x16b, src_temp6_8x16b); 17050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp17_8x16b = _mm_unpacklo_epi16(src_temp15_8x16b, src_temp16_8x16b); 17070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp18_8x16b = _mm_unpackhi_epi16(src_temp15_8x16b, src_temp16_8x16b); 17080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_unpacklo_epi32(src_temp7_8x16b, src_temp17_8x16b); 17100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_unpackhi_epi32(src_temp7_8x16b, src_temp17_8x16b); 17110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5_8x16b = _mm_srli_si128(src_temp1_8x16b, 8); 17130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6_8x16b = _mm_srli_si128(src_temp2_8x16b, 8); 17140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_unpacklo_epi32(src_temp8_8x16b, src_temp18_8x16b); 17160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_unpackhi_epi32(src_temp8_8x16b, src_temp18_8x16b); 17170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7_8x16b = _mm_srli_si128(src_temp3_8x16b, 8); 17190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8_8x16b = _mm_srli_si128(src_temp4_8x16b, 8); 17200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * (row + 7))), src_temp1_8x16b); /* row=7*/ 17220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * (row + 6))), src_temp5_8x16b); /* row=6*/ 17240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * (row + 5))), src_temp2_8x16b); /* row=5*/ 17260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * (row + 4))), src_temp6_8x16b); /* row=4*/ 17280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * (row + 3))), src_temp3_8x16b); /* row=3*/ 17300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * (row + 2))), src_temp7_8x16b); /* row=2*/ 17320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * (row + 1))), src_temp4_8x16b); /* row=1*/ 17340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * (row + 0))), src_temp8_8x16b); /* row=0*/ 17360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 17380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 17390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 17400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else 17410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 17420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar intra_pred_ang_4x32b = _mm_set1_epi16(intra_pred_ang); 17430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1); 17440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp2_4x32b = _mm_set1_epi16(31); 17450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp4_4x32b = _mm_set1_epi16(8); 17460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp3_4x32b = _mm_set1_epi16(32); 17470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_nt_4x32b = _mm_set1_epi16(two_nt - nt); 17480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 17490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 pi2_ref_main_idx1, pi2_ref_main_idx2, pi2_ref_main_idx3, pi2_ref_main_idx4; 17500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 pi2_ref_main_idx5, pi2_ref_main_idx6, pi2_ref_main_idx7, pi2_ref_main_idx8; 17510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i fract1_8x16b, fract2_8x16b, fract3_8x16b, fract8_8x16b; 17530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i temp1_8x16b, temp2_8x16b, temp3_8x16b, temp4_8x16b; 17550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i temp11_8x16b, temp12_8x16b, temp13_8x16b, temp14_8x16b; 17560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* pos = ((row + 1) * intra_pred_ang); */ 17580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar res_temp5_4x32b = _mm_mullo_epi16(row_4x32b, intra_pred_ang_4x32b); 17590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* idx = pos >> 5; */ 17610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract_4x32b = _mm_and_si128(res_temp5_4x32b, const_temp2_4x32b); 17620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract = pos & (31); */ 17640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_idx_4x32b = _mm_sub_epi16(two_nt_4x32b, _mm_srai_epi16(res_temp5_4x32b, 5)); 17650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(32 - fract) */ 17670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_sub_epi16(const_temp3_4x32b, fract_4x32b); 17680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_slli_epi16(fract_4x32b, 8); 17700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract3_8x16b = _mm_slli_epi16(fract2_8x16b, 8); /*(32 - fract) */ 17710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract_4x32b = _mm_or_si128(fract_4x32b, fract1_8x16b); 17730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_or_si128(fract2_8x16b, fract3_8x16b); /*(32 - fract) */ 17740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract8_8x16b = _mm_unpackhi_epi8(fract2_8x16b, fract_4x32b); 17770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract_4x32b = _mm_unpacklo_epi8(fract2_8x16b, fract_4x32b); 17780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1_8x16b = _mm_shuffle_epi32(fract_4x32b, 0x00); 17800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp2_8x16b = _mm_shuffle_epi32(fract_4x32b, 0x55); 17810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp3_8x16b = _mm_shuffle_epi32(fract_4x32b, 0xaa); 17820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp4_8x16b = _mm_shuffle_epi32(fract_4x32b, 0xff); 17830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp11_8x16b = _mm_shuffle_epi32(fract8_8x16b, 0x00); 17850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp12_8x16b = _mm_shuffle_epi32(fract8_8x16b, 0x55); 17860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp13_8x16b = _mm_shuffle_epi32(fract8_8x16b, 0xaa); 17870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp14_8x16b = _mm_shuffle_epi32(fract8_8x16b, 0xff); 17880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx1 = _mm_extract_epi16(ref_main_idx_4x32b, 0); /* col=0*/ 17900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx2 = _mm_extract_epi16(ref_main_idx_4x32b, 1); /* col=1*/ 17910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx3 = _mm_extract_epi16(ref_main_idx_4x32b, 2); /* col=2*/ 17920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx4 = _mm_extract_epi16(ref_main_idx_4x32b, 3); /* col=3*/ 17930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx5 = _mm_extract_epi16(ref_main_idx_4x32b, 4); /* col=5*/ 17950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx6 = _mm_extract_epi16(ref_main_idx_4x32b, 5); /* col=6*/ 17960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx7 = _mm_extract_epi16(ref_main_idx_4x32b, 6); /* col=7*/ 17970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx8 = _mm_extract_epi16(ref_main_idx_4x32b, 7); /* col=8*/ 17980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 17990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 18000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp1_8x16b, src_temp2_8x16b, src_temp3_8x16b, src_temp4_8x16b; 18010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp5_8x16b, src_temp6_8x16b, src_temp7_8x16b, src_temp8_8x16b; 18020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 18030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp11_8x16b, src_temp12_8x16b, src_temp13_8x16b, src_temp14_8x16b; 18040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp15_8x16b, src_temp16_8x16b, src_temp17_8x16b, src_temp18_8x16b; 18050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 18060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loding 8-bit 16 pixels */ 18070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_loadu_si128((__m128i *)(pu1_ref + pi2_ref_main_idx1 - 1)); /* col=0*/ 18080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_loadu_si128((__m128i *)(pu1_ref + pi2_ref_main_idx2 - 1)); /* col=1*/ 18090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_loadu_si128((__m128i *)(pu1_ref + pi2_ref_main_idx3 - 1)); /* col=2*/ 18100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_loadu_si128((__m128i *)(pu1_ref + pi2_ref_main_idx4 - 1)); /* col=3*/ 18110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 18120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loding 8-bit 16 pixels */ 18130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_loadu_si128((__m128i *)(pu1_ref + pi2_ref_main_idx5 - 1)); /* col=5*/ 18140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_loadu_si128((__m128i *)(pu1_ref + pi2_ref_main_idx6 - 1)); /* col=6*/ 18150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13_8x16b = _mm_loadu_si128((__m128i *)(pu1_ref + pi2_ref_main_idx7 - 1)); /* col=7*/ 18160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14_8x16b = _mm_loadu_si128((__m128i *)(pu1_ref + pi2_ref_main_idx8 - 1)); /* col=8*/ 18170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 18180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_shuffle_epi8(src_temp1_8x16b, sm3); /* col=0*/ 18190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_shuffle_epi8(src_temp2_8x16b, sm3); /* col=1*/ 18200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_shuffle_epi8(src_temp3_8x16b, sm3); /* col=2*/ 18210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_shuffle_epi8(src_temp4_8x16b, sm3); /* col=3*/ 18220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 18230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_shuffle_epi8(src_temp11_8x16b, sm3); /* col=0*/ 18240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_shuffle_epi8(src_temp12_8x16b, sm3); /* col=1*/ 18250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13_8x16b = _mm_shuffle_epi8(src_temp13_8x16b, sm3); /* col=2*/ 18260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14_8x16b = _mm_shuffle_epi8(src_temp14_8x16b, sm3); /* col=3*/ 18270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 18280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract*(pu1_ref[ref_main_idx + 1]- pu1_ref[ref_main_idx]) */ 18290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_maddubs_epi16(src_temp1_8x16b, temp1_8x16b); 18300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_maddubs_epi16(src_temp2_8x16b, temp2_8x16b); 18310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_maddubs_epi16(src_temp3_8x16b, temp3_8x16b); 18320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_maddubs_epi16(src_temp4_8x16b, temp4_8x16b); 18330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 18340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract*(pu1_ref[ref_main_idx + 1]- pu1_ref[ref_main_idx]) */ 18350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_maddubs_epi16(src_temp11_8x16b, temp11_8x16b); 18360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_maddubs_epi16(src_temp12_8x16b, temp12_8x16b); 18370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13_8x16b = _mm_maddubs_epi16(src_temp13_8x16b, temp13_8x16b); 18380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14_8x16b = _mm_maddubs_epi16(src_temp14_8x16b, temp14_8x16b); 18390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 18400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 18410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_add_epi16(src_temp1_8x16b, const_temp_4x32b); 18420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_add_epi16(src_temp2_8x16b, const_temp_4x32b); 18430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_add_epi16(src_temp3_8x16b, const_temp_4x32b); 18440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_add_epi16(src_temp4_8x16b, const_temp_4x32b); 18450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 18460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 18470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_srai_epi16(src_temp1_8x16b, 5); /* col=0*/ 18480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_srai_epi16(src_temp2_8x16b, 5); /* col=1*/ 18490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_srai_epi16(src_temp3_8x16b, 5); /* col=2*/ 18500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_srai_epi16(src_temp4_8x16b, 5); /* col=3*/ 18510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 18520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 18530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_add_epi16(src_temp11_8x16b, const_temp_4x32b); 18540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_add_epi16(src_temp12_8x16b, const_temp_4x32b); 18550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13_8x16b = _mm_add_epi16(src_temp13_8x16b, const_temp_4x32b); 18560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14_8x16b = _mm_add_epi16(src_temp14_8x16b, const_temp_4x32b); 18570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 18580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 18590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_srai_epi16(src_temp11_8x16b, 5); /* col=5*/ 18600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_srai_epi16(src_temp12_8x16b, 5); /* col=6*/ 18610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13_8x16b = _mm_srai_epi16(src_temp13_8x16b, 5); /* col=7*/ 18620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14_8x16b = _mm_srai_epi16(src_temp14_8x16b, 5); /* col=8*/ 18630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 18640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 18650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_packus_epi16(src_temp1_8x16b, src_temp3_8x16b); /* col=0*/ 18660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_packus_epi16(src_temp2_8x16b, src_temp4_8x16b); /* col=1*/ 18670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 18680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 18690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_packus_epi16(src_temp11_8x16b, src_temp13_8x16b); /* col=5*/ 18700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_packus_epi16(src_temp12_8x16b, src_temp14_8x16b); /* col=6*/ 18710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 18720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5_8x16b = _mm_unpacklo_epi8(src_temp1_8x16b, src_temp2_8x16b); 18730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6_8x16b = _mm_unpackhi_epi8(src_temp1_8x16b, src_temp2_8x16b); 18740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 18750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp15_8x16b = _mm_unpacklo_epi8(src_temp11_8x16b, src_temp12_8x16b); 18760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp16_8x16b = _mm_unpackhi_epi8(src_temp11_8x16b, src_temp12_8x16b); 18770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 18780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7_8x16b = _mm_unpacklo_epi16(src_temp5_8x16b, src_temp6_8x16b); 18790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8_8x16b = _mm_unpackhi_epi16(src_temp5_8x16b, src_temp6_8x16b); 18800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 18810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp17_8x16b = _mm_unpacklo_epi16(src_temp15_8x16b, src_temp16_8x16b); 18820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp18_8x16b = _mm_unpackhi_epi16(src_temp15_8x16b, src_temp16_8x16b); 18830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 18840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_unpacklo_epi32(src_temp7_8x16b, src_temp17_8x16b); 18850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_unpackhi_epi32(src_temp7_8x16b, src_temp17_8x16b); 18860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 18870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5_8x16b = _mm_srli_si128(src_temp1_8x16b, 8); 18880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6_8x16b = _mm_srli_si128(src_temp2_8x16b, 8); 18890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 18900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_unpacklo_epi32(src_temp8_8x16b, src_temp18_8x16b); 18910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_unpackhi_epi32(src_temp8_8x16b, src_temp18_8x16b); 18920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 18930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7_8x16b = _mm_srli_si128(src_temp3_8x16b, 8); 18940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8_8x16b = _mm_srli_si128(src_temp4_8x16b, 8); 18950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 18960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst), src_temp8_8x16b); /* row=0*/ 18970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd * 1)), src_temp4_8x16b); /* row=1*/ 18980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd * 2)), src_temp7_8x16b); /* row=2*/ 18990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd * 3)), src_temp3_8x16b); /* row=3*/ 19000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd * 4)), src_temp6_8x16b); /* row=4*/ 19010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd * 5)), src_temp2_8x16b); /* row=5*/ 19020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 19030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd * 6)), src_temp5_8x16b); /* row=6*/ 19040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd * 7)), src_temp1_8x16b); /* row=7*/ 19050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 19060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 19070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 19080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 19090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 19100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar} 19110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 19120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/** 19130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 19140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 19150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @brief 19160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Intra prediction interpolation filter for luma mode 11 to mode 17 19170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 19180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @par Description: 19190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Intraprediction for mode 11 to 17 (negative angle, horizontal mode ) 19200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* with reference neighboring samples location pointed by 'pu1_ref' to the 19210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* TU block location pointed by 'pu1_dst' 19220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 19230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pu1_src 19240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* UWORD8 pointer to the source 19250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 19260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[out] pu1_dst 19270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* UWORD8 pointer to the destination 19280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 19290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] src_strd 19300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer source stride 19310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 19320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] dst_strd 19330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer destination stride 19340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 19350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] nt 19360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer Transform Block size 19370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 19380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] mode 19390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer intraprediction mode 19400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 19410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @returns 19420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 19430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @remarks 19440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* None 19450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 19460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 19470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*/ 19480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 19490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 19500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarvoid ihevc_intra_pred_luma_mode_11_to_17_sse42(UWORD8 *pu1_ref, 19510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 src_strd, 19520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst, 19530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 dst_strd, 19540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 nt, 19550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 mode) 19560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar{ 19570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 19580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* This function and ihevc_intra_pred_luma_mode_19_to_25 are same except*/ 19590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* for ref main & side samples assignment,can be combined for */ 19600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* optimzation*/ 19610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 19620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 row, col, k; 19630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 two_nt; 19640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 intra_pred_ang, inv_ang, inv_ang_sum; 19650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 ref_idx; 19660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 19670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i const_temp_4x32b, const_temp2_4x32b, const_temp3_4x32b, const_temp4_4x32b; 19680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i fract_4x32b, intra_pred_ang_4x32b; 19690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i row_4x32b, two_nt_4x32b, ref_main_idx_4x32b, res_temp5_4x32b, sm3; 19700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 19710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 19720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 ref_tmp[2 * MAX_CU_SIZE + 2]; 19730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *ref_main; 19740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *ref_temp; 19750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UNUSED(src_strd); 19760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 19770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar inv_ang_sum = 128; 19780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_nt = 2 * nt; 19790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_temp = ref_tmp + 1; 19800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main = ref_temp + nt - 1; 19810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar intra_pred_ang = gai4_ihevc_ang_table[mode]; 19820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 19830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* For the angles other then 45 degree, interpolation btw 2 neighboring */ 19840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* samples dependent on distance to obtain destination sample */ 19850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp_4x32b = _mm_set1_epi16(16); 19860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp2_4x32b = _mm_set1_epi32(31); 19870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp3_4x32b = _mm_set1_epi32(32); 19880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp4_4x32b = _mm_set1_epi32(4); 19890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 19900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_nt_4x32b = _mm_set1_epi32(1); 19910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 19920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 19930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sm3 = _mm_load_si128((__m128i *)&IHEVCE_SHUFFLEMASKY11[0]); 19940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 19950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* intra_pred_ang = gai4_ihevc_ang_table[mode]; */ 19960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar intra_pred_ang_4x32b = _mm_set1_epi32(intra_pred_ang); 19970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 19980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_set_epi32(4, 3, 2, 1); 19990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar if(nt == 4) 20010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 20020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 ref_main_idx1, ref_main_idx2, ref_main_idx3, ref_main_idx4; 20040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int temp11, temp21, temp31, temp41; 20050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar// WORD8 ai1_fract_temp_val[16], ai1_row_temp_val[16]; 20060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i fract1_8x16b, fract2_8x16b; 20080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i temp1_8x16b, temp2_8x16b, temp3_8x16b, temp4_8x16b; 20090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp1_8x16b, src_temp2_8x16b, src_temp3_8x16b, src_temp4_8x16b; 20110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp5_8x16b, src_temp6_8x16b, src_temp7_8x16b, src_temp8_8x16b; 20120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i ref_main_temp0, ref_main_temp1, ref_main_temp2; 20130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Intermediate reference samples for negative angle modes */ 20150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* This have to be removed during optimization*/ 20160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* For horizontal modes, (ref main = ref left) (ref side = ref above) */ 20170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar inv_ang = gai4_ihevc_inv_ang_table[mode - 11]; 20180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main = ref_temp + nt - 1; 20200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(k = 0; k < nt + 1; k++) 20210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_temp[k + nt - 1] = pu1_ref[two_nt - k]; 20220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main = ref_temp + nt - 1; 20240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_idx = (nt * intra_pred_ang) >> 5; 20250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* SIMD Optimization can be done using look-up table for the loop */ 20270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* For negative angled derive the main reference samples from side */ 20280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* reference samples refer to section 8.4.4.2.6 */ 20290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(k = -1; k > ref_idx; k--) 20300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 20310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar inv_ang_sum += inv_ang; 20320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main[k] = pu1_ref[two_nt + (inv_ang_sum >> 8)]; 20330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 20340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* pos = ((row + 1) * intra_pred_ang); */ 20370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar res_temp5_4x32b = _mm_mullo_epi32(row_4x32b, intra_pred_ang_4x32b); 20380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* idx = pos >> 5; */ 20400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract_4x32b = _mm_and_si128(res_temp5_4x32b, const_temp2_4x32b); 20410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract = pos & (31); */ 20430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_idx_4x32b = _mm_add_epi32(two_nt_4x32b, _mm_srai_epi32(res_temp5_4x32b, 5)); 20440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(32 - fract) */ 20460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_sub_epi32(const_temp3_4x32b, fract_4x32b); 20470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_slli_epi16(fract_4x32b, 8); 20490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_slli_epi16(row_4x32b, 8); 20500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract_4x32b = _mm_or_si128(fract_4x32b, fract1_8x16b); 20520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_or_si128(row_4x32b, fract2_8x16b); /*(32 - fract) */ 20530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_unpackhi_epi8(fract_4x32b, row_4x32b); 20550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_unpacklo_epi8(fract_4x32b, row_4x32b); 20560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1_8x16b = _mm_shuffle_epi32(fract1_8x16b, 0x00); 20580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp2_8x16b = _mm_shuffle_epi32(fract1_8x16b, 0xaa); 20590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp3_8x16b = _mm_shuffle_epi32(fract2_8x16b, 0x00); 20600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp4_8x16b = _mm_shuffle_epi32(fract2_8x16b, 0xaa); 20610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_temp0 = _mm_srli_si128(ref_main_idx_4x32b, 4); /* next 32 bit values */ 20630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_temp1 = _mm_srli_si128(ref_main_idx_4x32b, 8); /* next 32 bit values */ 20640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_temp2 = _mm_srli_si128(ref_main_idx_4x32b, 12); /* next 32 bit values */ 20650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_idx1 = _mm_cvtsi128_si32(ref_main_idx_4x32b); /* col=0*/ 20660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_idx2 = _mm_cvtsi128_si32(ref_main_temp0); /* col=1*/ 20670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_idx3 = _mm_cvtsi128_si32(ref_main_temp1); /* col=2*/ 20680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_idx4 = _mm_cvtsi128_si32(ref_main_temp2); /* col=3*/ 20690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loding 8-bit 16 pixels */ 20710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5_8x16b = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx1)); /* col=0*/ 20720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6_8x16b = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx2)); /* col=1*/ 20730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7_8x16b = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx3)); /* col=2*/ 20740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8_8x16b = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx4)); /* col=3*/ 20750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_shuffle_epi8(src_temp5_8x16b, sm3); /* col=0*/ 20770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_shuffle_epi8(src_temp6_8x16b, sm3); /* col=1*/ 20780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_shuffle_epi8(src_temp7_8x16b, sm3); /* col=2*/ 20790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_shuffle_epi8(src_temp8_8x16b, sm3); /* col=3*/ 20800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract*(pu1_ref[ref_main_idx + 1]- pu1_ref[ref_main_idx]) */ 20820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_maddubs_epi16(src_temp1_8x16b, temp1_8x16b); 20830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_maddubs_epi16(src_temp2_8x16b, temp2_8x16b); 20840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_maddubs_epi16(src_temp3_8x16b, temp3_8x16b); 20850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_maddubs_epi16(src_temp4_8x16b, temp4_8x16b); 20860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 20880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_add_epi16(src_temp1_8x16b, const_temp_4x32b); 20890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_add_epi16(src_temp2_8x16b, const_temp_4x32b); 20900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_add_epi16(src_temp3_8x16b, const_temp_4x32b); 20910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_add_epi16(src_temp4_8x16b, const_temp_4x32b); 20920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 20940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_srai_epi16(src_temp1_8x16b, 5); /* col=0*/ 20950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_srai_epi16(src_temp2_8x16b, 5); /* col=1*/ 20960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_srai_epi16(src_temp3_8x16b, 5); /* col=2*/ 20970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_srai_epi16(src_temp4_8x16b, 5); /* col=3*/ 20980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 20990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 21000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_packus_epi16(src_temp1_8x16b, src_temp3_8x16b); /* col=0*/ 21010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_packus_epi16(src_temp2_8x16b, src_temp4_8x16b); /* col=1*/ 21020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5_8x16b = _mm_unpacklo_epi8(src_temp1_8x16b, src_temp2_8x16b); 21050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6_8x16b = _mm_unpackhi_epi8(src_temp1_8x16b, src_temp2_8x16b); 21060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7_8x16b = _mm_unpacklo_epi16(src_temp5_8x16b, src_temp6_8x16b); 21080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_srli_si128(src_temp7_8x16b, 4); 21090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_srli_si128(src_temp7_8x16b, 8); 21100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_srli_si128(src_temp7_8x16b, 12); 21110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp11 = _mm_cvtsi128_si32(src_temp7_8x16b); 21130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp21 = _mm_cvtsi128_si32(src_temp1_8x16b); 21140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp31 = _mm_cvtsi128_si32(src_temp2_8x16b); 21150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp41 = _mm_cvtsi128_si32(src_temp3_8x16b); 21160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loding 8-bit 4 pixels values */ 21180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(0 * dst_strd)]) = temp11; 21190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(1 * dst_strd)]) = temp21; 21200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(2 * dst_strd)]) = temp31; 21210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(3 * dst_strd)]) = temp41; 21220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 21230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else if(nt == 32) 21250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 21260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i temp1, temp2, temp3, temp11, temp12; 21290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_values0, src_values1; 21300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Intermediate reference samples for negative angle modes */ 21310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_temp[two_nt - 1] = pu1_ref[two_nt - nt]; 21330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1 = _mm_loadu_si128((__m128i *)(pu1_ref + nt + 1)); 21340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp3 = _mm_loadu_si128((__m128i *)(pu1_ref + nt + 17)); 21350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp2 = _mm_loadu_si128((__m128i *)IHEVCE_SHUFFLEMASKY3); 21360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* For negative angled derive the main reference samples from side */ 21380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt + 1)); /*nt-(nt+15)*/ 21400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt + 17)); /*(nt+16)-(two_nt-1)*/ 21410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp11 = _mm_loadu_si128((__m128i *)(inv_angle_shuffle[17 - mode])); 21430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp12 = _mm_loadu_si128((__m128i *)(inv_angle_shuffle[17 - mode] + 16)); 21440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_shuffle_epi8(src_values0, temp2); 21460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_shuffle_epi8(src_values1, temp2); 21470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_shuffle_epi8(src_values0, temp12); 21480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_shuffle_epi8(src_values1, temp11); 21490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1 = _mm_shuffle_epi8(temp1, temp2); 21510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp3 = _mm_shuffle_epi8(temp3, temp2); 21520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(ref_temp + nt - 1), temp3); 21540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(ref_temp + nt - 1 + 16), temp1); 21550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(ref_main - 16), src_values0); 21560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(ref_main - nt + inv_angle_shuffle[17 - mode][0]), src_values1); 21570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar intra_pred_ang_4x32b = _mm_set1_epi16(intra_pred_ang); 21600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1); 21610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp2_4x32b = _mm_set1_epi16(31); 21620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp4_4x32b = _mm_set1_epi16(8); 21630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp3_4x32b = _mm_set1_epi16(32); 21640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_nt_4x32b = _mm_set1_epi16(1); 21650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(col = 0; col < nt; col += 8) 21670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 21680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 pi2_ref_main_idx1, pi2_ref_main_idx2, pi2_ref_main_idx3, pi2_ref_main_idx4; 21690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 pi2_ref_main_idx5, pi2_ref_main_idx6, pi2_ref_main_idx7, pi2_ref_main_idx8; 21700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar // WORD8 ai1_fract_temp0_val[16], ai1_fract_temp1_val[16]; 21710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i fract1_8x16b, fract2_8x16b, fract3_8x16b, fract8_8x16b; 21730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i temp1_8x16b, temp2_8x16b, temp3_8x16b, temp4_8x16b; 21750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i temp11_8x16b, temp12_8x16b, temp13_8x16b, temp14_8x16b; 21760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* pos = ((row + 1) * intra_pred_ang); */ 21780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar res_temp5_4x32b = _mm_mullo_epi16(row_4x32b, intra_pred_ang_4x32b); 21790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* idx = pos >> 5; */ 21810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract_4x32b = _mm_and_si128(res_temp5_4x32b, const_temp2_4x32b); 21820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract = pos & (31); */ 21840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_idx_4x32b = _mm_add_epi16(two_nt_4x32b, _mm_srai_epi16(res_temp5_4x32b, 5)); 21850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_add_epi16(row_4x32b, const_temp4_4x32b); 21870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(32 - fract) */ 21880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_sub_epi16(const_temp3_4x32b, fract_4x32b); 21890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_slli_epi16(fract_4x32b, 8); 21910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract3_8x16b = _mm_slli_epi16(fract2_8x16b, 8); /*(32 - fract) */ 21920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract_4x32b = _mm_or_si128(fract_4x32b, fract1_8x16b); 21940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_or_si128(fract2_8x16b, fract3_8x16b); /*(32 - fract) */ 21950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 21970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract8_8x16b = _mm_unpackhi_epi8(fract_4x32b, fract2_8x16b); 21980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract_4x32b = _mm_unpacklo_epi8(fract_4x32b, fract2_8x16b); 21990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 22000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1_8x16b = _mm_shuffle_epi32(fract_4x32b, 0x00); 22010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp2_8x16b = _mm_shuffle_epi32(fract_4x32b, 0x55); 22020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp3_8x16b = _mm_shuffle_epi32(fract_4x32b, 0xaa); 22030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp4_8x16b = _mm_shuffle_epi32(fract_4x32b, 0xff); 22040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 22050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp11_8x16b = _mm_shuffle_epi32(fract8_8x16b, 0x00); 22060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp12_8x16b = _mm_shuffle_epi32(fract8_8x16b, 0x55); 22070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp13_8x16b = _mm_shuffle_epi32(fract8_8x16b, 0xaa); 22080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp14_8x16b = _mm_shuffle_epi32(fract8_8x16b, 0xff); 22090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 22100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx1 = _mm_extract_epi16(ref_main_idx_4x32b, 0); /* col=0*/ 22110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx2 = _mm_extract_epi16(ref_main_idx_4x32b, 1); /* col=1*/ 22120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx3 = _mm_extract_epi16(ref_main_idx_4x32b, 2); /* col=2*/ 22130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx4 = _mm_extract_epi16(ref_main_idx_4x32b, 3); /* col=3*/ 22140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 22150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx5 = _mm_extract_epi16(ref_main_idx_4x32b, 4); /* col=5*/ 22160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx6 = _mm_extract_epi16(ref_main_idx_4x32b, 5); /* col=6*/ 22170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx7 = _mm_extract_epi16(ref_main_idx_4x32b, 6); /* col=7*/ 22180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx8 = _mm_extract_epi16(ref_main_idx_4x32b, 7); /* col=8*/ 22190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 22200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = 0; row < nt; row += 8) 22210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 22220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp1_8x16b, src_temp2_8x16b, src_temp3_8x16b, src_temp4_8x16b; 22230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp5_8x16b, src_temp6_8x16b, src_temp7_8x16b, src_temp8_8x16b; 22240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 22250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 22260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp11_8x16b, src_temp12_8x16b, src_temp13_8x16b, src_temp14_8x16b; 22270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp15_8x16b, src_temp16_8x16b, src_temp17_8x16b, src_temp18_8x16b; 22280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 22290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loding 8-bit 16 pixels */ 22300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx1 + row)); /* col=0*/ 22310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx2 + row)); /* col=1*/ 22320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx3 + row)); /* col=2*/ 22330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx4 + row)); /* col=3*/ 22340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 22350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_srli_si128(src_temp5_8x16b, 1); /* col=0*/ 22360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_srli_si128(src_temp6_8x16b, 1); /* col=1*/ 22370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_srli_si128(src_temp7_8x16b, 1); /* col=2*/ 22380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_srli_si128(src_temp8_8x16b, 1); /* col=3*/ 22390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 22400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loding 8-bit 16 pixels */ 22410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp15_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx5 + row)); /* col=5*/ 22420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp16_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx6 + row)); /* col=6*/ 22430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp17_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx7 + row)); /* col=7*/ 22440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp18_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx8 + row)); /* col=8*/ 22450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 22460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_shuffle_epi8(src_temp5_8x16b, sm3); /* col=0*/ 22470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_shuffle_epi8(src_temp6_8x16b, sm3); /* col=1*/ 22480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_shuffle_epi8(src_temp7_8x16b, sm3); /* col=2*/ 22490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_shuffle_epi8(src_temp8_8x16b, sm3); /* col=3*/ 22500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 22510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_shuffle_epi8(src_temp15_8x16b, sm3); /* col=0*/ 22520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_shuffle_epi8(src_temp16_8x16b, sm3); /* col=1*/ 22530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13_8x16b = _mm_shuffle_epi8(src_temp17_8x16b, sm3); /* col=2*/ 22540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14_8x16b = _mm_shuffle_epi8(src_temp18_8x16b, sm3); /* col=3*/ 22550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 22560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract*(pu1_ref[ref_main_idx + 1]- pu1_ref[ref_main_idx]) */ 22570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_maddubs_epi16(src_temp1_8x16b, temp1_8x16b); 22580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_maddubs_epi16(src_temp2_8x16b, temp2_8x16b); 22590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_maddubs_epi16(src_temp3_8x16b, temp3_8x16b); 22600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_maddubs_epi16(src_temp4_8x16b, temp4_8x16b); 22610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 22620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract*(pu1_ref[ref_main_idx + 1]- pu1_ref[ref_main_idx]) */ 22630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_maddubs_epi16(src_temp11_8x16b, temp11_8x16b); 22640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_maddubs_epi16(src_temp12_8x16b, temp12_8x16b); 22650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13_8x16b = _mm_maddubs_epi16(src_temp13_8x16b, temp13_8x16b); 22660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14_8x16b = _mm_maddubs_epi16(src_temp14_8x16b, temp14_8x16b); 22670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 22680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 22690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_add_epi16(src_temp1_8x16b, const_temp_4x32b); 22700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_add_epi16(src_temp2_8x16b, const_temp_4x32b); 22710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_add_epi16(src_temp3_8x16b, const_temp_4x32b); 22720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_add_epi16(src_temp4_8x16b, const_temp_4x32b); 22730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 22740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 22750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_srai_epi16(src_temp1_8x16b, 5); /* col=0*/ 22760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_srai_epi16(src_temp2_8x16b, 5); /* col=1*/ 22770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_srai_epi16(src_temp3_8x16b, 5); /* col=2*/ 22780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_srai_epi16(src_temp4_8x16b, 5); /* col=3*/ 22790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 22800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 22810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_add_epi16(src_temp11_8x16b, const_temp_4x32b); 22820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_add_epi16(src_temp12_8x16b, const_temp_4x32b); 22830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13_8x16b = _mm_add_epi16(src_temp13_8x16b, const_temp_4x32b); 22840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14_8x16b = _mm_add_epi16(src_temp14_8x16b, const_temp_4x32b); 22850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 22860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 22870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_srai_epi16(src_temp11_8x16b, 5); /* col=5*/ 22880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_srai_epi16(src_temp12_8x16b, 5); /* col=6*/ 22890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13_8x16b = _mm_srai_epi16(src_temp13_8x16b, 5); /* col=7*/ 22900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14_8x16b = _mm_srai_epi16(src_temp14_8x16b, 5); /* col=8*/ 22910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 22920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 22930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_packus_epi16(src_temp1_8x16b, src_temp3_8x16b); /* col=0*/ 22940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_packus_epi16(src_temp2_8x16b, src_temp4_8x16b); /* col=1*/ 22950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 22960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 22970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_packus_epi16(src_temp11_8x16b, src_temp13_8x16b); /* col=5*/ 22980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_packus_epi16(src_temp12_8x16b, src_temp14_8x16b); /* col=6*/ 22990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5_8x16b = _mm_unpacklo_epi8(src_temp1_8x16b, src_temp2_8x16b); 23010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6_8x16b = _mm_unpackhi_epi8(src_temp1_8x16b, src_temp2_8x16b); 23020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp15_8x16b = _mm_unpacklo_epi8(src_temp11_8x16b, src_temp12_8x16b); 23040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp16_8x16b = _mm_unpackhi_epi8(src_temp11_8x16b, src_temp12_8x16b); 23050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7_8x16b = _mm_unpacklo_epi16(src_temp5_8x16b, src_temp6_8x16b); 23070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8_8x16b = _mm_unpackhi_epi16(src_temp5_8x16b, src_temp6_8x16b); 23080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp17_8x16b = _mm_unpacklo_epi16(src_temp15_8x16b, src_temp16_8x16b); 23100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp18_8x16b = _mm_unpackhi_epi16(src_temp15_8x16b, src_temp16_8x16b); 23110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_unpacklo_epi32(src_temp7_8x16b, src_temp17_8x16b); 23140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_unpackhi_epi32(src_temp7_8x16b, src_temp17_8x16b); 23150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_unpacklo_epi32(src_temp8_8x16b, src_temp18_8x16b); 23170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_unpackhi_epi32(src_temp8_8x16b, src_temp18_8x16b); 23180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5_8x16b = _mm_srli_si128(src_temp1_8x16b, 8); 23200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6_8x16b = _mm_srli_si128(src_temp2_8x16b, 8); 23210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7_8x16b = _mm_srli_si128(src_temp3_8x16b, 8); 23220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8_8x16b = _mm_srli_si128(src_temp4_8x16b, 8); 23230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * row)), src_temp1_8x16b); /* row=0*/ 23250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * (row + 1))), src_temp5_8x16b); /* row=1*/ 23270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * (row + 2))), src_temp2_8x16b); /* row=2*/ 23290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * (row + 3))), src_temp6_8x16b); /* row=4*/ 23310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * (row + 4))), src_temp3_8x16b); /* row=5*/ 23330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * (row + 5))), src_temp7_8x16b); /* row=6*/ 23350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * (row + 6))), src_temp4_8x16b); /* row=7*/ 23370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * (row + 7))), src_temp8_8x16b); /* row=8*/ 23390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 23410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 23420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 23430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else if(nt == 16) 23440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 23450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i temp1, temp2, temp11, src_values0; 23470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Intermediate reference samples for negative angle modes */ 23480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* For horizontal modes, (ref main = ref above) (ref side = ref left) */ 23490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_temp[two_nt - 1] = pu1_ref[two_nt - nt]; 23500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1 = _mm_loadu_si128((__m128i *)(pu1_ref + nt + 1)); 23510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp2 = _mm_loadu_si128((__m128i *)IHEVCE_SHUFFLEMASKY3); 23520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt + 1)); /*nt-(nt+15)*/ 23530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp11 = _mm_loadu_si128((__m128i *)(inv_angle_shuffle[17 - mode] + 16)); 23550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_shuffle_epi8(src_values0, temp2); 23570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1 = _mm_shuffle_epi8(temp1, temp2); 23580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_shuffle_epi8(src_values0, temp11); 23590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(ref_main - nt), src_values0); 23610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(ref_temp + nt - 1), temp1); 23620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar intra_pred_ang_4x32b = _mm_set1_epi16(intra_pred_ang); 23640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1); 23650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp2_4x32b = _mm_set1_epi16(31); 23660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp4_4x32b = _mm_set1_epi16(8); 23670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp3_4x32b = _mm_set1_epi16(32); 23680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_nt_4x32b = _mm_set1_epi16(1); 23690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(col = 0; col < nt; col += 8) 23710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 23720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 pi2_ref_main_idx1, pi2_ref_main_idx2, pi2_ref_main_idx3, pi2_ref_main_idx4; 23730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 pi2_ref_main_idx5, pi2_ref_main_idx6, pi2_ref_main_idx7, pi2_ref_main_idx8; 23740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar // WORD8 ai1_fract_temp0_val[16], ai1_fract_temp1_val[16]; 23750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i fract1_8x16b, fract2_8x16b, fract3_8x16b, fract8_8x16b; 23770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i temp1_8x16b, temp2_8x16b, temp3_8x16b, temp4_8x16b; 23790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i temp11_8x16b, temp12_8x16b, temp13_8x16b, temp14_8x16b; 23800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* pos = ((row + 1) * intra_pred_ang); */ 23820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar res_temp5_4x32b = _mm_mullo_epi16(row_4x32b, intra_pred_ang_4x32b); 23830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* idx = pos >> 5; */ 23850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract_4x32b = _mm_and_si128(res_temp5_4x32b, const_temp2_4x32b); 23860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract = pos & (31); */ 23880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_idx_4x32b = _mm_add_epi16(two_nt_4x32b, _mm_srai_epi16(res_temp5_4x32b, 5)); 23890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_add_epi16(row_4x32b, const_temp4_4x32b); 23910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(32 - fract) */ 23920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_sub_epi16(const_temp3_4x32b, fract_4x32b); 23930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_slli_epi16(fract_4x32b, 8); 23950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract3_8x16b = _mm_slli_epi16(fract2_8x16b, 8); /*(32 - fract) */ 23960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 23970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract_4x32b = _mm_or_si128(fract_4x32b, fract1_8x16b); 23980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_or_si128(fract2_8x16b, fract3_8x16b); /*(32 - fract) */ 23990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 24000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 24010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract8_8x16b = _mm_unpackhi_epi8(fract_4x32b, fract2_8x16b); 24020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract_4x32b = _mm_unpacklo_epi8(fract_4x32b, fract2_8x16b); 24030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 24040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1_8x16b = _mm_shuffle_epi32(fract_4x32b, 0x00); 24050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp2_8x16b = _mm_shuffle_epi32(fract_4x32b, 0x55); 24060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp3_8x16b = _mm_shuffle_epi32(fract_4x32b, 0xaa); 24070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp4_8x16b = _mm_shuffle_epi32(fract_4x32b, 0xff); 24080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 24090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp11_8x16b = _mm_shuffle_epi32(fract8_8x16b, 0x00); 24100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp12_8x16b = _mm_shuffle_epi32(fract8_8x16b, 0x55); 24110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp13_8x16b = _mm_shuffle_epi32(fract8_8x16b, 0xaa); 24120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp14_8x16b = _mm_shuffle_epi32(fract8_8x16b, 0xff); 24130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 24140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx1 = _mm_extract_epi16(ref_main_idx_4x32b, 0); /* col=0*/ 24150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx2 = _mm_extract_epi16(ref_main_idx_4x32b, 1); /* col=1*/ 24160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx3 = _mm_extract_epi16(ref_main_idx_4x32b, 2); /* col=2*/ 24170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx4 = _mm_extract_epi16(ref_main_idx_4x32b, 3); /* col=3*/ 24180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 24190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx5 = _mm_extract_epi16(ref_main_idx_4x32b, 4); /* col=5*/ 24200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx6 = _mm_extract_epi16(ref_main_idx_4x32b, 5); /* col=6*/ 24210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx7 = _mm_extract_epi16(ref_main_idx_4x32b, 6); /* col=7*/ 24220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx8 = _mm_extract_epi16(ref_main_idx_4x32b, 7); /* col=8*/ 24230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 24240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = 0; row < nt; row += 8) 24250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 24260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp1_8x16b, src_temp2_8x16b, src_temp3_8x16b, src_temp4_8x16b; 24270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp5_8x16b, src_temp6_8x16b, src_temp7_8x16b, src_temp8_8x16b; 24280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 24290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 24300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp11_8x16b, src_temp12_8x16b, src_temp13_8x16b, src_temp14_8x16b; 24310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp15_8x16b, src_temp16_8x16b, src_temp17_8x16b, src_temp18_8x16b; 24320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 24330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loding 8-bit 16 pixels */ 24340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx1 + row)); /* col=0*/ 24350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx2 + row)); /* col=1*/ 24360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx3 + row)); /* col=2*/ 24370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx4 + row)); /* col=3*/ 24380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 24390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_srli_si128(src_temp5_8x16b, 1); /* col=0*/ 24400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_srli_si128(src_temp6_8x16b, 1); /* col=1*/ 24410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_srli_si128(src_temp7_8x16b, 1); /* col=2*/ 24420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_srli_si128(src_temp8_8x16b, 1); /* col=3*/ 24430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 24440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loding 8-bit 16 pixels */ 24450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp15_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx5 + row)); /* col=5*/ 24460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp16_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx6 + row)); /* col=6*/ 24470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp17_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx7 + row)); /* col=7*/ 24480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp18_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx8 + row)); /* col=8*/ 24490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 24500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_shuffle_epi8(src_temp5_8x16b, sm3); /* col=0*/ 24510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_shuffle_epi8(src_temp6_8x16b, sm3); /* col=1*/ 24520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_shuffle_epi8(src_temp7_8x16b, sm3); /* col=2*/ 24530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_shuffle_epi8(src_temp8_8x16b, sm3); /* col=3*/ 24540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 24550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_shuffle_epi8(src_temp15_8x16b, sm3); /* col=0*/ 24560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_shuffle_epi8(src_temp16_8x16b, sm3); /* col=1*/ 24570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13_8x16b = _mm_shuffle_epi8(src_temp17_8x16b, sm3); /* col=2*/ 24580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14_8x16b = _mm_shuffle_epi8(src_temp18_8x16b, sm3); /* col=3*/ 24590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 24600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract*(pu1_ref[ref_main_idx + 1]- pu1_ref[ref_main_idx]) */ 24610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_maddubs_epi16(src_temp1_8x16b, temp1_8x16b); 24620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_maddubs_epi16(src_temp2_8x16b, temp2_8x16b); 24630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_maddubs_epi16(src_temp3_8x16b, temp3_8x16b); 24640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_maddubs_epi16(src_temp4_8x16b, temp4_8x16b); 24650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 24660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract*(pu1_ref[ref_main_idx + 1]- pu1_ref[ref_main_idx]) */ 24670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_maddubs_epi16(src_temp11_8x16b, temp11_8x16b); 24680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_maddubs_epi16(src_temp12_8x16b, temp12_8x16b); 24690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13_8x16b = _mm_maddubs_epi16(src_temp13_8x16b, temp13_8x16b); 24700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14_8x16b = _mm_maddubs_epi16(src_temp14_8x16b, temp14_8x16b); 24710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 24720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 24730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_add_epi16(src_temp1_8x16b, const_temp_4x32b); 24740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_add_epi16(src_temp2_8x16b, const_temp_4x32b); 24750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_add_epi16(src_temp3_8x16b, const_temp_4x32b); 24760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_add_epi16(src_temp4_8x16b, const_temp_4x32b); 24770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 24780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 24790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_srai_epi16(src_temp1_8x16b, 5); /* col=0*/ 24800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_srai_epi16(src_temp2_8x16b, 5); /* col=1*/ 24810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_srai_epi16(src_temp3_8x16b, 5); /* col=2*/ 24820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_srai_epi16(src_temp4_8x16b, 5); /* col=3*/ 24830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 24840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 24850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_add_epi16(src_temp11_8x16b, const_temp_4x32b); 24860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_add_epi16(src_temp12_8x16b, const_temp_4x32b); 24870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13_8x16b = _mm_add_epi16(src_temp13_8x16b, const_temp_4x32b); 24880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14_8x16b = _mm_add_epi16(src_temp14_8x16b, const_temp_4x32b); 24890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 24900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 24910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_srai_epi16(src_temp11_8x16b, 5); /* col=5*/ 24920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_srai_epi16(src_temp12_8x16b, 5); /* col=6*/ 24930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13_8x16b = _mm_srai_epi16(src_temp13_8x16b, 5); /* col=7*/ 24940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14_8x16b = _mm_srai_epi16(src_temp14_8x16b, 5); /* col=8*/ 24950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 24960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 24970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_packus_epi16(src_temp1_8x16b, src_temp3_8x16b); /* col=0*/ 24980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_packus_epi16(src_temp2_8x16b, src_temp4_8x16b); /* col=1*/ 24990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 25010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_packus_epi16(src_temp11_8x16b, src_temp13_8x16b); /* col=5*/ 25020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_packus_epi16(src_temp12_8x16b, src_temp14_8x16b); /* col=6*/ 25030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5_8x16b = _mm_unpacklo_epi8(src_temp1_8x16b, src_temp2_8x16b); 25050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6_8x16b = _mm_unpackhi_epi8(src_temp1_8x16b, src_temp2_8x16b); 25060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp15_8x16b = _mm_unpacklo_epi8(src_temp11_8x16b, src_temp12_8x16b); 25080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp16_8x16b = _mm_unpackhi_epi8(src_temp11_8x16b, src_temp12_8x16b); 25090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7_8x16b = _mm_unpacklo_epi16(src_temp5_8x16b, src_temp6_8x16b); 25110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8_8x16b = _mm_unpackhi_epi16(src_temp5_8x16b, src_temp6_8x16b); 25120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp17_8x16b = _mm_unpacklo_epi16(src_temp15_8x16b, src_temp16_8x16b); 25140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp18_8x16b = _mm_unpackhi_epi16(src_temp15_8x16b, src_temp16_8x16b); 25150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_unpacklo_epi32(src_temp7_8x16b, src_temp17_8x16b); 25180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_unpackhi_epi32(src_temp7_8x16b, src_temp17_8x16b); 25190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_unpacklo_epi32(src_temp8_8x16b, src_temp18_8x16b); 25210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_unpackhi_epi32(src_temp8_8x16b, src_temp18_8x16b); 25220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5_8x16b = _mm_srli_si128(src_temp1_8x16b, 8); 25240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6_8x16b = _mm_srli_si128(src_temp2_8x16b, 8); 25250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7_8x16b = _mm_srli_si128(src_temp3_8x16b, 8); 25260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8_8x16b = _mm_srli_si128(src_temp4_8x16b, 8); 25270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * row)), src_temp1_8x16b); /* row=0*/ 25290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * (row + 1))), src_temp5_8x16b); /* row=1*/ 25310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * (row + 2))), src_temp2_8x16b); /* row=2*/ 25330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * (row + 3))), src_temp6_8x16b); /* row=4*/ 25350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * (row + 4))), src_temp3_8x16b); /* row=5*/ 25370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * (row + 5))), src_temp7_8x16b); /* row=6*/ 25390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * (row + 6))), src_temp4_8x16b); /* row=7*/ 25410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + col + (dst_strd * (row + 7))), src_temp8_8x16b); /* row=8*/ 25430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 25450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 25460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 25470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else 25480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 25490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i temp1, temp2, temp11, src_values0; 25520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Intermediate reference samples for negative angle modes */ 25530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* For horizontal modes, (ref main = ref above) (ref side = ref left) */ 25540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_temp[two_nt - 1] = pu1_ref[nt]; 25550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1 = _mm_loadu_si128((__m128i *)(pu1_ref + 1)); 25560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* For negative angled derive the main reference samples from side */ 25580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt + 1)); /*nt-(nt+15)*/ 25600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp2 = _mm_loadu_si128((__m128i *)IHEVCE_SHUFFLEMASKY3); 25610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp11 = _mm_loadu_si128((__m128i *)(inv_angle_shuffle[17 - mode] + 16)); 25620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_shuffle_epi8(src_values0, temp2); 25640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1 = _mm_shuffle_epi8(temp1, temp2); 25650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_shuffle_epi8(src_values0, temp11); 25660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_srli_si128(src_values0, 8); 25670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(ref_temp + nt - 1), temp1); 25690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(ref_main - nt), src_values0); 25700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar intra_pred_ang_4x32b = _mm_set1_epi16(intra_pred_ang); 25730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1); 25740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp2_4x32b = _mm_set1_epi16(31); 25750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp4_4x32b = _mm_set1_epi16(8); 25760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp3_4x32b = _mm_set1_epi16(32); 25770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_nt_4x32b = _mm_set1_epi16(1); 25780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 25800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 pi2_ref_main_idx1, pi2_ref_main_idx2, pi2_ref_main_idx3, pi2_ref_main_idx4; 25810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 pi2_ref_main_idx5, pi2_ref_main_idx6, pi2_ref_main_idx7, pi2_ref_main_idx8; 25820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //WORD8 ai1_fract_temp0_val[16], ai1_fract_temp1_val[16]; 25830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i fract1_8x16b, fract2_8x16b, fract3_8x16b, fract8_8x16b; 25850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i temp1_8x16b, temp2_8x16b, temp3_8x16b, temp4_8x16b; 25870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i temp11_8x16b, temp12_8x16b, temp13_8x16b, temp14_8x16b; 25880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* pos = ((row + 1) * intra_pred_ang); */ 25900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar res_temp5_4x32b = _mm_mullo_epi16(row_4x32b, intra_pred_ang_4x32b); 25910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* idx = pos >> 5; */ 25930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract_4x32b = _mm_and_si128(res_temp5_4x32b, const_temp2_4x32b); 25940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract = pos & (31); */ 25960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_idx_4x32b = _mm_add_epi16(two_nt_4x32b, _mm_srai_epi16(res_temp5_4x32b, 5)); 25970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 25980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(32 - fract) */ 25990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_sub_epi16(const_temp3_4x32b, fract_4x32b); 26000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 26010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_slli_epi16(fract_4x32b, 8); 26020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract3_8x16b = _mm_slli_epi16(fract2_8x16b, 8); /*(32 - fract) */ 26030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 26040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract_4x32b = _mm_or_si128(fract_4x32b, fract1_8x16b); 26050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_or_si128(fract2_8x16b, fract3_8x16b); /*(32 - fract) */ 26060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 26070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract8_8x16b = _mm_unpackhi_epi8(fract_4x32b, fract2_8x16b); 26080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract_4x32b = _mm_unpacklo_epi8(fract_4x32b, fract2_8x16b); 26090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 26100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1_8x16b = _mm_shuffle_epi32(fract_4x32b, 0x00); 26110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp2_8x16b = _mm_shuffle_epi32(fract_4x32b, 0x55); 26120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp3_8x16b = _mm_shuffle_epi32(fract_4x32b, 0xaa); 26130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp4_8x16b = _mm_shuffle_epi32(fract_4x32b, 0xff); 26140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 26150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp11_8x16b = _mm_shuffle_epi32(fract8_8x16b, 0x00); 26160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp12_8x16b = _mm_shuffle_epi32(fract8_8x16b, 0x55); 26170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp13_8x16b = _mm_shuffle_epi32(fract8_8x16b, 0xaa); 26180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp14_8x16b = _mm_shuffle_epi32(fract8_8x16b, 0xff); 26190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 26200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx1 = _mm_extract_epi16(ref_main_idx_4x32b, 0); /* col=0*/ 26210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx2 = _mm_extract_epi16(ref_main_idx_4x32b, 1); /* col=1*/ 26220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx3 = _mm_extract_epi16(ref_main_idx_4x32b, 2); /* col=2*/ 26230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx4 = _mm_extract_epi16(ref_main_idx_4x32b, 3); /* col=3*/ 26240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 26250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx5 = _mm_extract_epi16(ref_main_idx_4x32b, 4); /* col=5*/ 26260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx6 = _mm_extract_epi16(ref_main_idx_4x32b, 5); /* col=6*/ 26270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx7 = _mm_extract_epi16(ref_main_idx_4x32b, 6); /* col=7*/ 26280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_ref_main_idx8 = _mm_extract_epi16(ref_main_idx_4x32b, 7); /* col=8*/ 26290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 26300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 26310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp1_8x16b, src_temp2_8x16b, src_temp3_8x16b, src_temp4_8x16b; 26320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp5_8x16b, src_temp6_8x16b, src_temp7_8x16b, src_temp8_8x16b; 26330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 26340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp11_8x16b, src_temp12_8x16b, src_temp13_8x16b, src_temp14_8x16b; 26350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_temp15_8x16b, src_temp16_8x16b, src_temp17_8x16b, src_temp18_8x16b; 26360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 26370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loding 8-bit 16 pixels */ 26380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx1)); /* col=0*/ 26390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx2)); /* col=1*/ 26400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx3)); /* col=2*/ 26410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx4)); /* col=3*/ 26420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 26430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loding 8-bit 16 pixels */ 26440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp15_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx5)); /* col=5*/ 26450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp16_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx6)); /* col=6*/ 26460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp17_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx7)); /* col=7*/ 26470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp18_8x16b = _mm_loadu_si128((__m128i *)(ref_main + pi2_ref_main_idx8)); /* col=8*/ 26480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 26490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_shuffle_epi8(src_temp5_8x16b, sm3); /* col=0*/ 26500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_shuffle_epi8(src_temp6_8x16b, sm3); /* col=1*/ 26510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_shuffle_epi8(src_temp7_8x16b, sm3); /* col=2*/ 26520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_shuffle_epi8(src_temp8_8x16b, sm3); /* col=3*/ 26530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 26540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_shuffle_epi8(src_temp15_8x16b, sm3); /* col=0*/ 26550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_shuffle_epi8(src_temp16_8x16b, sm3); /* col=1*/ 26560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13_8x16b = _mm_shuffle_epi8(src_temp17_8x16b, sm3); /* col=2*/ 26570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14_8x16b = _mm_shuffle_epi8(src_temp18_8x16b, sm3); /* col=3*/ 26580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 26590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract*(pu1_ref[ref_main_idx + 1]- pu1_ref[ref_main_idx]) */ 26600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_maddubs_epi16(src_temp1_8x16b, temp1_8x16b); 26610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_maddubs_epi16(src_temp2_8x16b, temp2_8x16b); 26620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_maddubs_epi16(src_temp3_8x16b, temp3_8x16b); 26630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_maddubs_epi16(src_temp4_8x16b, temp4_8x16b); 26640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 26650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract*(pu1_ref[ref_main_idx + 1]- pu1_ref[ref_main_idx]) */ 26660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_maddubs_epi16(src_temp11_8x16b, temp11_8x16b); 26670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_maddubs_epi16(src_temp12_8x16b, temp12_8x16b); 26680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13_8x16b = _mm_maddubs_epi16(src_temp13_8x16b, temp13_8x16b); 26690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14_8x16b = _mm_maddubs_epi16(src_temp14_8x16b, temp14_8x16b); 26700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 26710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 26720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_add_epi16(src_temp1_8x16b, const_temp_4x32b); 26730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_add_epi16(src_temp2_8x16b, const_temp_4x32b); 26740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_add_epi16(src_temp3_8x16b, const_temp_4x32b); 26750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_add_epi16(src_temp4_8x16b, const_temp_4x32b); 26760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 26770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 26780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_srai_epi16(src_temp1_8x16b, 5); /* row=0*/ 26790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_srai_epi16(src_temp2_8x16b, 5); /* row=1*/ 26800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_srai_epi16(src_temp3_8x16b, 5); /* row=2*/ 26810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_srai_epi16(src_temp4_8x16b, 5); /* row=3*/ 26820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 26830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 26840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_add_epi16(src_temp11_8x16b, const_temp_4x32b); 26850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_add_epi16(src_temp12_8x16b, const_temp_4x32b); 26860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13_8x16b = _mm_add_epi16(src_temp13_8x16b, const_temp_4x32b); 26870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14_8x16b = _mm_add_epi16(src_temp14_8x16b, const_temp_4x32b); 26880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 26890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 26900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_srai_epi16(src_temp11_8x16b, 5); /* col=5*/ 26910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_srai_epi16(src_temp12_8x16b, 5); /* col=6*/ 26920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp13_8x16b = _mm_srai_epi16(src_temp13_8x16b, 5); /* col=7*/ 26930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp14_8x16b = _mm_srai_epi16(src_temp14_8x16b, 5); /* col=8*/ 26940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 26950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 26960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_packus_epi16(src_temp1_8x16b, src_temp3_8x16b); /* col=0*/ 26970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_packus_epi16(src_temp2_8x16b, src_temp4_8x16b); /* col=1*/ 26980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 26990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 27000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp11_8x16b = _mm_packus_epi16(src_temp11_8x16b, src_temp13_8x16b); /* col=4*/ 27010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp12_8x16b = _mm_packus_epi16(src_temp12_8x16b, src_temp14_8x16b); /* col=5*/ 27020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5_8x16b = _mm_unpacklo_epi8(src_temp1_8x16b, src_temp2_8x16b); 27040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6_8x16b = _mm_unpackhi_epi8(src_temp1_8x16b, src_temp2_8x16b); 27050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp15_8x16b = _mm_unpacklo_epi8(src_temp11_8x16b, src_temp12_8x16b); 27070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp16_8x16b = _mm_unpackhi_epi8(src_temp11_8x16b, src_temp12_8x16b); 27080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7_8x16b = _mm_unpacklo_epi16(src_temp5_8x16b, src_temp6_8x16b); 27100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8_8x16b = _mm_unpackhi_epi16(src_temp5_8x16b, src_temp6_8x16b); 27110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp17_8x16b = _mm_unpacklo_epi16(src_temp15_8x16b, src_temp16_8x16b); 27130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp18_8x16b = _mm_unpackhi_epi16(src_temp15_8x16b, src_temp16_8x16b); 27140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp1_8x16b = _mm_unpacklo_epi32(src_temp7_8x16b, src_temp17_8x16b); 27170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp2_8x16b = _mm_unpackhi_epi32(src_temp7_8x16b, src_temp17_8x16b); 27180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp3_8x16b = _mm_unpacklo_epi32(src_temp8_8x16b, src_temp18_8x16b); 27200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp4_8x16b = _mm_unpackhi_epi32(src_temp8_8x16b, src_temp18_8x16b); 27210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp5_8x16b = _mm_srli_si128(src_temp1_8x16b, 8); 27230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp6_8x16b = _mm_srli_si128(src_temp2_8x16b, 8); 27240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp7_8x16b = _mm_srli_si128(src_temp3_8x16b, 8); 27250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_temp8_8x16b = _mm_srli_si128(src_temp4_8x16b, 8); 27260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd * 0)), src_temp1_8x16b); /* row=0*/ 27280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd * (1))), src_temp5_8x16b); /* row=1*/ 27300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd * (2))), src_temp2_8x16b); /* row=2*/ 27320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd * (3))), src_temp6_8x16b); /* row=3*/ 27340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd * (4))), src_temp3_8x16b); /* row=4*/ 27360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd * (5))), src_temp7_8x16b); /* row=5*/ 27380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd * (6))), src_temp4_8x16b); /* row=6*/ 27400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + (dst_strd * (7))), src_temp8_8x16b); /* row=7*/ 27420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 27440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 27450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 27460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar} 27480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/** 27520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 27530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 27540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @brief 27550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Intra prediction interpolation filter for luma mode 19 to mode 25 27560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 27570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @par Description: 27580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Intraprediction for mode 19 to 25 (negative angle, vertical mode ) with 27590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* reference neighboring samples location pointed by 'pu1_ref' to the TU 27600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* block location pointed by 'pu1_dst' 27610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 27620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pu1_src 27630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* UWORD8 pointer to the source 27640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 27650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[out] pu1_dst 27660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* UWORD8 pointer to the destination 27670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 27680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] src_strd 27690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer source stride 27700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 27710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] dst_strd 27720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer destination stride 27730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 27740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] nt 27750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer Transform Block size 27760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 27770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] mode 27780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer intraprediction mode 27790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 27800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @returns 27810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 27820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @remarks 27830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* None 27840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 27850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 27860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*/ 27870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarvoid ihevc_intra_pred_luma_mode_19_to_25_sse42(UWORD8 *pu1_ref, 27900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 src_strd, 27910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst, 27920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 dst_strd, 27930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 nt, 27940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 mode) 27950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar{ 27960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 27970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 row, k; 27980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 two_nt, intra_pred_ang; 27990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 inv_ang, inv_ang_sum; 28000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //WORD32 ref_main_idx, pos, fract, idx; 28010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 ref_idx; 28020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 ref_tmp[(2 * MAX_CU_SIZE) + 2]; 28030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *ref_main, *ref_temp; 28040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i /*fract_8x16b,*/ const_temp_8x16b, sm3; 28060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i temp1, temp2, temp3, temp4; 28070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i temp11, temp12, temp13, temp14; 28080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UNUSED(src_strd); 28090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_nt = 2 * nt; 28110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar intra_pred_ang = gai4_ihevc_ang_table[mode]; 28120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar inv_ang = gai4_ihevc_inv_ang_table[mode - 12]; 28130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Intermediate reference samples for negative angle modes */ 28150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* This have to be removed during optimization*/ 28160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* For horizontal modes, (ref main = ref above) (ref side = ref left) */ 28170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_temp = ref_tmp + 1; 28180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main = ref_temp + nt - 1; 28190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sm3 = _mm_load_si128((__m128i *)&IHEVCE_SHUFFLEMASKY11[0]); 28220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp_8x16b = _mm_set1_epi16(16); 28260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar if(nt == 32) 28280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 28290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i const_temp2_4x32b, const_temp3_4x32b, const_temp8_4x32b; 28310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_values10, src_values11, intra_pred_ang_4x32b; 28320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i row_4x32b, two_nt_4x32b, src_values12; 28330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_values0, src_values1, src_values2, src_values3; 28350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_values4, src_values5, src_values6, src_values7; 28360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 col = 0; 28370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Intermediate reference samples for negative angle modes */ 28390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* This have to be removed during optimization*/ 28400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* For horizontal modes, (ref main = ref above) (ref side = ref left) */ 28410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_temp[two_nt - 1] = pu1_ref[two_nt + nt]; 28420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt)); 28430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp3 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt + 16)); 28440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* SIMD Optimization can be done using look-up table for the loop */ 28460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* For negative angled derive the main reference samples from side */ 28470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* reference samples refer to section 8.4.4.2.6 */ 28480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt - nt)); /*nt-(nt+15)*/ 28490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt - 16)); /*(nt+16)-(two_nt-1)*/ 28500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp11 = _mm_loadu_si128((__m128i *)(inv_angle_shuffle[mode - 19])); 28520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp12 = _mm_loadu_si128((__m128i *)(inv_angle_shuffle[mode - 19] + 16)); 28530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_shuffle_epi8(src_values0, temp11); 28550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_shuffle_epi8(src_values1, temp12); 28560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(ref_temp + nt - 1), temp1); 28580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(ref_temp + nt - 1 + 16), temp3); 28590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(ref_main - 16), src_values1); 28600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(ref_main - nt + inv_angle_shuffle[mode - 19][0]), src_values0); 28610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp2_4x32b = _mm_set1_epi16(31); 28630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp3_4x32b = _mm_set1_epi16(32); 28640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp8_4x32b = _mm_set1_epi16(8); 28650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_nt_4x32b = _mm_set1_epi16(1); 28670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* intra_pred_ang = gai4_ihevc_ang_table[mode]; */ 28690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar intra_pred_ang_4x32b = _mm_set1_epi16(intra_pred_ang); 28700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1); 28720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = 0; row < nt; row += 8) 28740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 28750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 ref_main_idx[9]; 28770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i res_temp5_4x32b; 28790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i fract1_8x16b, fract2_8x16b; 28800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* pos = ((row + 1) * intra_pred_ang); */ 28820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar res_temp5_4x32b = _mm_mullo_epi16(row_4x32b, intra_pred_ang_4x32b); 28830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract = pos & (31); */ 28850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values12 = _mm_add_epi16(two_nt_4x32b, _mm_srai_epi16(res_temp5_4x32b, 5)); 28860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* idx = pos >> 5; */ 28880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values11 = _mm_and_si128(res_temp5_4x32b, const_temp2_4x32b); 28890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(32 - fract) */ 28910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values10 = _mm_sub_epi16(const_temp3_4x32b, src_values11); 28920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_slli_epi16(src_values11, 8); 28940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_slli_epi16(src_values10, 8); 28950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values11 = _mm_or_si128(src_values11, fract1_8x16b); 28970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values10 = _mm_or_si128(src_values10, fract2_8x16b); /*(32 - fract) */ 28980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 28990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_unpackhi_epi8(src_values11, src_values10); 29000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_unpacklo_epi8(src_values11, src_values10); 29010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 29020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1 = _mm_shuffle_epi32(fract1_8x16b, 0x00); 29030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp2 = _mm_shuffle_epi32(fract1_8x16b, 0x55); 29040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp3 = _mm_shuffle_epi32(fract1_8x16b, 0xaa); 29050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp4 = _mm_shuffle_epi32(fract1_8x16b, 0xff); 29060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 29070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp11 = _mm_shuffle_epi32(fract2_8x16b, 0x00); 29080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp12 = _mm_shuffle_epi32(fract2_8x16b, 0x55); 29090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp13 = _mm_shuffle_epi32(fract2_8x16b, 0xaa); 29100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp14 = _mm_shuffle_epi32(fract2_8x16b, 0xff); 29110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 29120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_add_epi16(row_4x32b, const_temp8_4x32b); 29130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)ref_main_idx, src_values12); 29140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(col = 0; col < nt; col += 16) 29150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 29160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[0] + col)); 29170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[1] + col)); 29180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[2] + col)); 29190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[3] + col)); 29200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[0] + 8 + col)); 29210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[1] + 8 + col)); 29220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[2] + 8 + col)); 29230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[3] + 8 + col)); 29240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 29250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_shuffle_epi8(src_values0, sm3); 29260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_shuffle_epi8(src_values1, sm3); 29270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_shuffle_epi8(src_values2, sm3); 29280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_shuffle_epi8(src_values3, sm3); 29290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_shuffle_epi8(src_values4, sm3); 29300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_shuffle_epi8(src_values5, sm3); 29310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_shuffle_epi8(src_values6, sm3); 29320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_shuffle_epi8(src_values7, sm3); 29330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 29340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 29350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_maddubs_epi16(src_values0, temp1); 29360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_maddubs_epi16(src_values1, temp2); 29370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_maddubs_epi16(src_values2, temp3); 29380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_maddubs_epi16(src_values3, temp4); 29390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_maddubs_epi16(src_values4, temp1); 29400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_maddubs_epi16(src_values5, temp2); 29410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_maddubs_epi16(src_values6, temp3); 29420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_maddubs_epi16(src_values7, temp4); 29430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 29440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 29450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_add_epi16(src_values0, const_temp_8x16b); 29460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_add_epi16(src_values1, const_temp_8x16b); 29470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_add_epi16(src_values2, const_temp_8x16b); 29480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_add_epi16(src_values3, const_temp_8x16b); 29490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_add_epi16(src_values4, const_temp_8x16b); 29500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_add_epi16(src_values5, const_temp_8x16b); 29510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_add_epi16(src_values6, const_temp_8x16b); 29520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_add_epi16(src_values7, const_temp_8x16b); 29530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 29540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 29550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_srai_epi16(src_values0, 5); 29560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_srai_epi16(src_values1, 5); 29570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_srai_epi16(src_values2, 5); 29580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_srai_epi16(src_values3, 5); 29590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_srai_epi16(src_values4, 5); 29600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_srai_epi16(src_values5, 5); 29610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_srai_epi16(src_values6, 5); 29620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_srai_epi16(src_values7, 5); 29630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 29640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 29650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_packus_epi16(src_values0, src_values4); 29660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_packus_epi16(src_values1, src_values5); 29670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_packus_epi16(src_values2, src_values6); 29680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_packus_epi16(src_values3, src_values7); 29690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 29700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loading 8-bit 8 pixels values */ 29710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + col + (0) * dst_strd), src_values0); /* row=0*/ 29720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + col + (1) * dst_strd), src_values1); /* row=1*/ 29730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + col + (2) * dst_strd), src_values2); /* row=2*/ 29740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + col + (3) * dst_strd), src_values3); /* row=3*/ 29750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 29760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 29770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[4] + col)); 29780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[5] + col)); 29790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[6] + col)); 29800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[7] + col)); 29810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[4] + 8 + col)); 29820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[5] + 8 + col)); 29830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[6] + 8 + col)); 29840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[7] + 8 + col)); 29850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 29860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_shuffle_epi8(src_values0, sm3); 29870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_shuffle_epi8(src_values1, sm3); 29880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_shuffle_epi8(src_values2, sm3); 29890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_shuffle_epi8(src_values3, sm3); 29900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_shuffle_epi8(src_values4, sm3); 29910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_shuffle_epi8(src_values5, sm3); 29920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_shuffle_epi8(src_values6, sm3); 29930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_shuffle_epi8(src_values7, sm3); 29940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 29950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 29960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_maddubs_epi16(src_values0, temp11); 29970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_maddubs_epi16(src_values1, temp12); 29980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_maddubs_epi16(src_values2, temp13); 29990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_maddubs_epi16(src_values3, temp14); 30000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_maddubs_epi16(src_values4, temp11); 30010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_maddubs_epi16(src_values5, temp12); 30020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_maddubs_epi16(src_values6, temp13); 30030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_maddubs_epi16(src_values7, temp14); 30040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 30060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_add_epi16(src_values0, const_temp_8x16b); 30070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_add_epi16(src_values1, const_temp_8x16b); 30080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_add_epi16(src_values2, const_temp_8x16b); 30090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_add_epi16(src_values3, const_temp_8x16b); 30100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_add_epi16(src_values4, const_temp_8x16b); 30110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_add_epi16(src_values5, const_temp_8x16b); 30120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_add_epi16(src_values6, const_temp_8x16b); 30130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_add_epi16(src_values7, const_temp_8x16b); 30140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 30160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_srai_epi16(src_values0, 5); 30170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_srai_epi16(src_values1, 5); 30180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_srai_epi16(src_values2, 5); 30190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_srai_epi16(src_values3, 5); 30200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_srai_epi16(src_values4, 5); 30210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_srai_epi16(src_values5, 5); 30220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_srai_epi16(src_values6, 5); 30230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_srai_epi16(src_values7, 5); 30240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 30260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_packus_epi16(src_values0, src_values4); 30270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_packus_epi16(src_values1, src_values5); 30280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_packus_epi16(src_values2, src_values6); 30290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_packus_epi16(src_values3, src_values7); 30300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loading 8-bit 8 pixels values */ 30320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + col + (4) * dst_strd), src_values0); /* row=4*/ 30330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + col + (5) * dst_strd), src_values1); /* row=5*/ 30340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + col + (6) * dst_strd), src_values2); /* row=6*/ 30350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + col + (7) * dst_strd), src_values3); /* row=7*/ 30360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 30380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst += 8 * dst_strd; 30390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 30400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 30420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else if(nt == 16) /* for nt = 16 case */ 30430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 30440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i const_temp2_4x32b, const_temp3_4x32b, const_temp8_4x32b; 30460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_values10, src_values11, intra_pred_ang_4x32b; 30470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i row_4x32b, two_nt_4x32b, src_values12; 30480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_values0, src_values1, src_values2, src_values3; 30490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_values4, src_values5, src_values6, src_values7; 30500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Intermediate reference samples for negative angle modes */ 30530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* For horizontal modes, (ref main = ref above) (ref side = ref left) */ 30540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_temp[two_nt - 1] = pu1_ref[two_nt + nt]; 30550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt)); 30560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_loadu_si128((__m128i *)(pu1_ref + two_nt - nt)); /*nt-(nt+15)*/ 30580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp11 = _mm_loadu_si128((__m128i *)(inv_angle_shuffle[mode - 19] + 16)); 30600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_shuffle_epi8(src_values0, temp11); 30620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(ref_main - nt), src_values0); 30640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(ref_temp + nt - 1), temp1); 30650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp2_4x32b = _mm_set1_epi16(31); 30670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp3_4x32b = _mm_set1_epi16(32); 30680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp8_4x32b = _mm_set1_epi16(8); 30690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_nt_4x32b = _mm_set1_epi16(1); 30710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* intra_pred_ang = gai4_ihevc_ang_table[mode]; */ 30730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar intra_pred_ang_4x32b = _mm_set1_epi16(intra_pred_ang); 30740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1); 30760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = 0; row < nt; row += 8) 30780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 30790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 ref_main_idx[9]; 30810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i res_temp5_4x32b; 30830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i fract1_8x16b, fract2_8x16b; 30840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* pos = ((row + 1) * intra_pred_ang); */ 30860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar res_temp5_4x32b = _mm_mullo_epi16(row_4x32b, intra_pred_ang_4x32b); 30870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract = pos & (31); */ 30890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values12 = _mm_add_epi16(two_nt_4x32b, _mm_srai_epi16(res_temp5_4x32b, 5)); 30900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* idx = pos >> 5; */ 30920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values11 = _mm_and_si128(res_temp5_4x32b, const_temp2_4x32b); 30930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(32 - fract) */ 30950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values10 = _mm_sub_epi16(const_temp3_4x32b, src_values11); 30960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 30970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_slli_epi16(src_values11, 8); 30980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_slli_epi16(src_values10, 8); 30990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 31000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values11 = _mm_or_si128(src_values11, fract1_8x16b); 31010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values10 = _mm_or_si128(src_values10, fract2_8x16b); /*(32 - fract) */ 31020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 31030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_unpackhi_epi8(src_values11, src_values10); 31040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_unpacklo_epi8(src_values11, src_values10); 31050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 31060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1 = _mm_shuffle_epi32(fract1_8x16b, 0x00); 31070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp2 = _mm_shuffle_epi32(fract1_8x16b, 0x55); 31080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp3 = _mm_shuffle_epi32(fract1_8x16b, 0xaa); 31090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp4 = _mm_shuffle_epi32(fract1_8x16b, 0xff); 31100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 31110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp11 = _mm_shuffle_epi32(fract2_8x16b, 0x00); 31120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp12 = _mm_shuffle_epi32(fract2_8x16b, 0x55); 31130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp13 = _mm_shuffle_epi32(fract2_8x16b, 0xaa); 31140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp14 = _mm_shuffle_epi32(fract2_8x16b, 0xff); 31150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 31160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_add_epi16(row_4x32b, const_temp8_4x32b); 31170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)ref_main_idx, src_values12); 31180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 31190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 31200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[0])); 31210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[1])); 31220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[2])); 31230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[3])); 31240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[0] + 8)); 31250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[1] + 8)); 31260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[2] + 8)); 31270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[3] + 8)); 31280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 31290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_shuffle_epi8(src_values0, sm3); 31300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_shuffle_epi8(src_values1, sm3); 31310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_shuffle_epi8(src_values2, sm3); 31320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_shuffle_epi8(src_values3, sm3); 31330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_shuffle_epi8(src_values4, sm3); 31340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_shuffle_epi8(src_values5, sm3); 31350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_shuffle_epi8(src_values6, sm3); 31360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_shuffle_epi8(src_values7, sm3); 31370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 31380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 31390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_maddubs_epi16(src_values0, temp1); 31400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_maddubs_epi16(src_values1, temp2); 31410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_maddubs_epi16(src_values2, temp3); 31420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_maddubs_epi16(src_values3, temp4); 31430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_maddubs_epi16(src_values4, temp1); 31440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_maddubs_epi16(src_values5, temp2); 31450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_maddubs_epi16(src_values6, temp3); 31460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_maddubs_epi16(src_values7, temp4); 31470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 31480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 31490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_add_epi16(src_values0, const_temp_8x16b); 31500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_add_epi16(src_values1, const_temp_8x16b); 31510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_add_epi16(src_values2, const_temp_8x16b); 31520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_add_epi16(src_values3, const_temp_8x16b); 31530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_add_epi16(src_values4, const_temp_8x16b); 31540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_add_epi16(src_values5, const_temp_8x16b); 31550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_add_epi16(src_values6, const_temp_8x16b); 31560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_add_epi16(src_values7, const_temp_8x16b); 31570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 31580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 31590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_srai_epi16(src_values0, 5); 31600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_srai_epi16(src_values1, 5); 31610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_srai_epi16(src_values2, 5); 31620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_srai_epi16(src_values3, 5); 31630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_srai_epi16(src_values4, 5); 31640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_srai_epi16(src_values5, 5); 31650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_srai_epi16(src_values6, 5); 31660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_srai_epi16(src_values7, 5); 31670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 31680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 31690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_packus_epi16(src_values0, src_values4); 31700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_packus_epi16(src_values1, src_values5); 31710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_packus_epi16(src_values2, src_values6); 31720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_packus_epi16(src_values3, src_values7); 31730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 31740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loading 8-bit 8 pixels values */ 31750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + (0) * dst_strd), src_values0); /* row=0*/ 31760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + (1) * dst_strd), src_values1); /* row=1*/ 31770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + (2) * dst_strd), src_values2); /* row=2*/ 31780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + (3) * dst_strd), src_values3); /* row=3*/ 31790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 31800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 31810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[4])); 31820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[5])); 31830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[6])); 31840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[7])); 31850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[4] + 8)); 31860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[5] + 8)); 31870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[6] + 8)); 31880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[7] + 8)); 31890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 31900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_shuffle_epi8(src_values0, sm3); 31910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_shuffle_epi8(src_values1, sm3); 31920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_shuffle_epi8(src_values2, sm3); 31930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_shuffle_epi8(src_values3, sm3); 31940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_shuffle_epi8(src_values4, sm3); 31950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_shuffle_epi8(src_values5, sm3); 31960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_shuffle_epi8(src_values6, sm3); 31970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_shuffle_epi8(src_values7, sm3); 31980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 31990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_maddubs_epi16(src_values0, temp11); 32010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_maddubs_epi16(src_values1, temp12); 32020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_maddubs_epi16(src_values2, temp13); 32030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_maddubs_epi16(src_values3, temp14); 32040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_maddubs_epi16(src_values4, temp11); 32050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_maddubs_epi16(src_values5, temp12); 32060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_maddubs_epi16(src_values6, temp13); 32070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_maddubs_epi16(src_values7, temp14); 32080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 32100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_add_epi16(src_values0, const_temp_8x16b); 32110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_add_epi16(src_values1, const_temp_8x16b); 32120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_add_epi16(src_values2, const_temp_8x16b); 32130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_add_epi16(src_values3, const_temp_8x16b); 32140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_add_epi16(src_values4, const_temp_8x16b); 32150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_add_epi16(src_values5, const_temp_8x16b); 32160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_add_epi16(src_values6, const_temp_8x16b); 32170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_add_epi16(src_values7, const_temp_8x16b); 32180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 32200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_srai_epi16(src_values0, 5); 32210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_srai_epi16(src_values1, 5); 32220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_srai_epi16(src_values2, 5); 32230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_srai_epi16(src_values3, 5); 32240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_srai_epi16(src_values4, 5); 32250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_srai_epi16(src_values5, 5); 32260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_srai_epi16(src_values6, 5); 32270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_srai_epi16(src_values7, 5); 32280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 32300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_packus_epi16(src_values0, src_values4); 32310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_packus_epi16(src_values1, src_values5); 32320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_packus_epi16(src_values2, src_values6); 32330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_packus_epi16(src_values3, src_values7); 32340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loading 8-bit 8 pixels values */ 32360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + (4) * dst_strd), src_values0); /* row=4*/ 32370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + (5) * dst_strd), src_values1); /* row=5*/ 32380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + (6) * dst_strd), src_values2); /* row=6*/ 32390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + (7) * dst_strd), src_values3); /* row=7*/ 32400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 32420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst += 8 * dst_strd; 32430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 32440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 32450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else if(nt == 8) 32460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 32470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i const_temp2_4x32b, const_temp3_4x32b; 32500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_values10, src_values11, intra_pred_ang_4x32b; 32510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i row_4x32b, two_nt_4x32b, src_values12; 32530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_values0, src_values1, src_values2, src_values3; 32540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_values4, src_values5, src_values6, src_values7; 32550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Intermediate reference samples for negative angle modes */ 32580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* For horizontal modes, (ref main = ref above) (ref side = ref left) */ 32590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_temp[two_nt - 1] = pu1_ref[two_nt + nt]; 32600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1 = _mm_loadl_epi64((__m128i *)(pu1_ref + two_nt)); 32610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* For negative angled derive the main reference samples from side */ 32630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_loadu_si128((__m128i *)(pu1_ref)); /*nt-(nt+15)*/ 32650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp11 = _mm_loadu_si128((__m128i *)(inv_angle_shuffle[mode - 19] + 16)); 32670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_shuffle_epi8(src_values0, temp11); 32690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_srli_si128(src_values0, 8); 32700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(ref_temp + nt - 1), temp1); 32710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(ref_main - nt), src_values0); 32720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp2_4x32b = _mm_set1_epi16(31); 32760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp3_4x32b = _mm_set1_epi16(32); 32770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_nt_4x32b = _mm_set1_epi16(1); 32800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* intra_pred_ang = gai4_ihevc_ang_table[mode]; */ 32830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar intra_pred_ang_4x32b = _mm_set1_epi16(intra_pred_ang); 32840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1); 32860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 32880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 ref_main_idx[9]; 32900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i res_temp5_4x32b; 32920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i fract1_8x16b, fract2_8x16b; 32930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* pos = ((row + 1) * intra_pred_ang); */ 32950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar res_temp5_4x32b = _mm_mullo_epi16(row_4x32b, intra_pred_ang_4x32b); 32960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 32970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract = pos & (31); */ 32980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values12 = _mm_add_epi16(two_nt_4x32b, _mm_srai_epi16(res_temp5_4x32b, 5)); 32990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 33000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* idx = pos >> 5; */ 33010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values11 = _mm_and_si128(res_temp5_4x32b, const_temp2_4x32b); 33020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 33030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(32 - fract) */ 33040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values10 = _mm_sub_epi16(const_temp3_4x32b, src_values11); 33050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 33060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_slli_epi16(src_values11, 8); 33070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_slli_epi16(src_values10, 8); 33080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 33090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values11 = _mm_or_si128(src_values11, fract1_8x16b); 33100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values10 = _mm_or_si128(src_values10, fract2_8x16b); /*(32 - fract) */ 33110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 33120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_unpackhi_epi8(src_values11, src_values10); 33130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_unpacklo_epi8(src_values11, src_values10); 33140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 33150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1 = _mm_shuffle_epi32(fract1_8x16b, 0x00); 33160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp2 = _mm_shuffle_epi32(fract1_8x16b, 0x55); 33170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp3 = _mm_shuffle_epi32(fract1_8x16b, 0xaa); 33180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp4 = _mm_shuffle_epi32(fract1_8x16b, 0xff); 33190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 33200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp11 = _mm_shuffle_epi32(fract2_8x16b, 0x00); 33210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp12 = _mm_shuffle_epi32(fract2_8x16b, 0x55); 33220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp13 = _mm_shuffle_epi32(fract2_8x16b, 0xaa); 33230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp14 = _mm_shuffle_epi32(fract2_8x16b, 0xff); 33240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 33250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)ref_main_idx, src_values12); 33260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 33270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[0])); /* col = 0-7 */ 33280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[1])); /* col = 8-15 */ 33290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[2])); /* col = 16-23 */ 33300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[3])); /* col = 24-31 */ 33310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[4])); /* col = 32-39 */ 33320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[5])); /* col = 40-47 */ 33330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[6])); /* col = 48-55 */ 33340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx[7])); /* col = 56-63*/ 33350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 33360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_shuffle_epi8(src_values0, sm3); 33370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_shuffle_epi8(src_values1, sm3); 33380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_shuffle_epi8(src_values2, sm3); 33390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_shuffle_epi8(src_values3, sm3); 33400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_shuffle_epi8(src_values4, sm3); 33410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_shuffle_epi8(src_values5, sm3); 33420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_shuffle_epi8(src_values6, sm3); 33430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_shuffle_epi8(src_values7, sm3); 33440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 33450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 33460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_maddubs_epi16(src_values0, temp1); 33470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_maddubs_epi16(src_values1, temp2); 33480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_maddubs_epi16(src_values2, temp3); 33490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_maddubs_epi16(src_values3, temp4); 33500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_maddubs_epi16(src_values4, temp11); 33510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_maddubs_epi16(src_values5, temp12); 33520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_maddubs_epi16(src_values6, temp13); 33530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_maddubs_epi16(src_values7, temp14); 33540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 33550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 33560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_add_epi16(src_values0, const_temp_8x16b); 33570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_add_epi16(src_values1, const_temp_8x16b); 33580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_add_epi16(src_values2, const_temp_8x16b); 33590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_add_epi16(src_values3, const_temp_8x16b); 33600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_add_epi16(src_values4, const_temp_8x16b); 33610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_add_epi16(src_values5, const_temp_8x16b); 33620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_add_epi16(src_values6, const_temp_8x16b); 33630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_add_epi16(src_values7, const_temp_8x16b); 33640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 33650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 33660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_srai_epi16(src_values0, 5); 33670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_srai_epi16(src_values1, 5); 33680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_srai_epi16(src_values2, 5); 33690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_srai_epi16(src_values3, 5); 33700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_srai_epi16(src_values4, 5); 33710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_srai_epi16(src_values5, 5); 33720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_srai_epi16(src_values6, 5); 33730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_srai_epi16(src_values7, 5); 33740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 33750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 33760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_packus_epi16(src_values0, src_values1); 33770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_packus_epi16(src_values2, src_values3); 33780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_srli_si128(src_values0, 8); 33790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_srli_si128(src_values2, 8); 33800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_packus_epi16(src_values4, src_values5); 33810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_packus_epi16(src_values6, src_values7); 33820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_srli_si128(src_values4, 8); 33830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_srli_si128(src_values6, 8); 33840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 33850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loading 8-bit 8 pixels values */ 33860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), src_values0); /* row=0*/ 33870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), src_values1); /* row=1*/ 33880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), src_values2); /* row=2*/ 33890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), src_values3); /* row=3*/ 33900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), src_values4); /* row=4*/ 33910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), src_values5); /* row=5*/ 33920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), src_values6); /* row=6*/ 33930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), src_values7); /* row=7*/ 33940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 33950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 33960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else /* if nt =4*/ 33970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 33980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 33990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i const_temp2_4x32b, const_temp3_4x32b; 34000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_values10, src_values11, intra_pred_ang_4x32b; 34010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i row_4x32b, two_nt_4x32b, src_values12; 34030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(k = 0; k < (nt + 1); k++) 34060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_temp[k + nt - 1] = pu1_ref[two_nt + k]; 34070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_idx = (nt * intra_pred_ang) >> 5; 34080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar inv_ang_sum = 128; 34090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(k = -1; k > ref_idx; k--) 34110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 34120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar inv_ang_sum += inv_ang; 34130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main[k] = pu1_ref[two_nt - (inv_ang_sum >> 8)]; 34140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 34150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp2_4x32b = _mm_set1_epi32(31); 34180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp3_4x32b = _mm_set1_epi32(32); 34190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_nt_4x32b = _mm_set1_epi32(1); 34210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* intra_pred_ang = gai4_ihevc_ang_table[mode]; */ 34240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar intra_pred_ang_4x32b = _mm_set1_epi32(intra_pred_ang); 34250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_set_epi32(4, 3, 2, 1); 34270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 34280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 ref_main_idx1, ref_main_idx2, ref_main_idx3, ref_main_idx4; 34290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int temp11, temp21, temp31, temp41; 34300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i fract1_8x16b, fract2_8x16b, res_temp5_4x32b; 34330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_values0, src_values1, src_values2, src_values3; 34340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i ref_main_temp0, ref_main_temp1, ref_main_temp2; 34350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* pos = ((row + 1) * intra_pred_ang); */ 34370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar res_temp5_4x32b = _mm_mullo_epi32(row_4x32b, intra_pred_ang_4x32b); 34380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract = pos & (31); */ 34400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values12 = _mm_add_epi32(two_nt_4x32b, _mm_srai_epi32(res_temp5_4x32b, 5)); 34410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_temp0 = _mm_srli_si128(src_values12, 4); /* next 32 bit values */ 34430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_temp1 = _mm_srli_si128(src_values12, 8); /* next 32 bit values */ 34440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_temp2 = _mm_srli_si128(src_values12, 12); /* next 32 bit values */ 34450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_idx1 = _mm_cvtsi128_si32(src_values12); /* row=0*/ 34460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_idx2 = _mm_cvtsi128_si32(ref_main_temp0); /* row=1*/ 34470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_idx3 = _mm_cvtsi128_si32(ref_main_temp1); /* row=2*/ 34480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_idx4 = _mm_cvtsi128_si32(ref_main_temp2); /* row=3*/ 34490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* idx = pos >> 5; */ 34510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values11 = _mm_and_si128(res_temp5_4x32b, const_temp2_4x32b); 34520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(32 - fract) */ 34540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values10 = _mm_sub_epi32(const_temp3_4x32b, src_values11); 34550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_slli_epi16(src_values11, 8); 34570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_slli_epi16(src_values10, 8); 34580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values11 = _mm_or_si128(src_values11, fract1_8x16b); 34600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values10 = _mm_or_si128(src_values10, fract2_8x16b); /*(32 - fract) */ 34610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_unpackhi_epi8(src_values11, src_values10); 34630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_unpacklo_epi8(src_values11, src_values10); 34640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1 = _mm_shuffle_epi32(fract1_8x16b, 0x00); 34660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp2 = _mm_shuffle_epi32(fract1_8x16b, 0xaa); 34670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp3 = _mm_shuffle_epi32(fract2_8x16b, 0x00); 34680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp4 = _mm_shuffle_epi32(fract2_8x16b, 0xaa); 34690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx1)); /* col = 0-7 */ 34710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx2)); /* col = 8-15 */ 34720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx3)); /* col = 16-23 */ 34730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_loadu_si128((__m128i *)(ref_main + ref_main_idx4)); /* col = 24-31 */ 34740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_shuffle_epi8(src_values0, sm3); 34760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_shuffle_epi8(src_values1, sm3); 34770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_shuffle_epi8(src_values2, sm3); 34780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_shuffle_epi8(src_values3, sm3); 34790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_maddubs_epi16(src_values0, temp1); 34820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_maddubs_epi16(src_values1, temp2); 34830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_maddubs_epi16(src_values2, temp3); 34840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_maddubs_epi16(src_values3, temp4); 34850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 34870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_add_epi16(src_values0, const_temp_8x16b); 34880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_add_epi16(src_values1, const_temp_8x16b); 34890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_add_epi16(src_values2, const_temp_8x16b); 34900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_add_epi16(src_values3, const_temp_8x16b); 34910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 34930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_srai_epi16(src_values0, 5); 34940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_srai_epi16(src_values1, 5); 34950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_srai_epi16(src_values2, 5); 34960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_srai_epi16(src_values3, 5); 34970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 34980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 34990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_packus_epi16(src_values0, src_values1); 35000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_packus_epi16(src_values2, src_values3); 35010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_srli_si128(src_values0, 8); 35020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_srli_si128(src_values2, 8); 35030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 35040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp11 = _mm_cvtsi128_si32(src_values0); 35050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp21 = _mm_cvtsi128_si32(src_values1); 35060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp31 = _mm_cvtsi128_si32(src_values2); 35070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp41 = _mm_cvtsi128_si32(src_values3); 35080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 35090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loding 4-bit 8 pixels values */ 35100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(0 * dst_strd)]) = temp11; 35110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(1 * dst_strd)]) = temp21; 35120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(2 * dst_strd)]) = temp31; 35130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(3 * dst_strd)]) = temp41; 35140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 35150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 35160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 35170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar} 35180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 35190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 35200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/** 35210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 35220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 35230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @brief 35240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Intra prediction interpolation filter for luma mode 27 to mode 33 35250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 35260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @par Description: 35270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Intraprediction for mode 27 to 33 (positive angle, vertical mode ) with 35280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* reference neighboring samples location pointed by 'pu1_ref' to the TU 35290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* block location pointed by 'pu1_dst' 35300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 35310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pu1_src 35320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* UWORD8 pointer to the source 35330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 35340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[out] pu1_dst 35350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* UWORD8 pointer to the destination 35360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 35370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] src_strd 35380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer source stride 35390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 35400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] dst_strd 35410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer destination stride 35420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 35430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] nt 35440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer Transform Block size 35450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 35460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] mode 35470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* integer intraprediction mode 35480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 35490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @returns 35500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 35510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @remarks 35520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* None 35530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 35540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 35550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*/ 35560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 35570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 35580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarvoid ihevc_intra_pred_luma_mode_27_to_33_sse42(UWORD8 *pu1_ref, 35590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 src_strd, 35600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst, 35610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 dst_strd, 35620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 nt, 35630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 mode) 35640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar{ 35650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 row; 35660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 two_nt; 35670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 intra_pred_ang; 35680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 35690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i temp11, temp12, temp13, temp14; 35700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 35710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i const_temp_8x16b; 35720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i temp1, temp2, temp3, temp4, sm3; 35730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UNUSED(src_strd); 35740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 35750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_nt = 2 * nt; 35760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar intra_pred_ang = gai4_ihevc_ang_table[mode]; 35770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 35780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp_8x16b = _mm_set1_epi16(16); 35790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sm3 = _mm_load_si128((__m128i *)&IHEVCE_SHUFFLEMASKY11[0]); 35800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar if(nt == 32) 35810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 35820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 35830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i const_temp2_4x32b, const_temp3_4x32b, const_temp8_4x32b; 35840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_values10, src_values11, intra_pred_ang_4x32b; 35850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i row_4x32b, two_nt_4x32b, src_values12; 35860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int col = 0; 35870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 35880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp2_4x32b = _mm_set1_epi16(31); 35890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp3_4x32b = _mm_set1_epi16(32); 35900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp8_4x32b = _mm_set1_epi16(8); 35910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 35920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_nt_4x32b = _mm_set1_epi16(two_nt + 1); 35930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 35940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* intra_pred_ang = gai4_ihevc_ang_table[mode]; */ 35950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar intra_pred_ang_4x32b = _mm_set1_epi16(intra_pred_ang); 35960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 35970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1); 35980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 35990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = 0; row < nt; row += 8) 36000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 36010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 36020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 ref_main_idx[9]; 36030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 36040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i res_temp5_4x32b; 36050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i fract1_8x16b, fract2_8x16b; 36060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_values0, src_values1, src_values2, src_values3; 36070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_values4, src_values5, src_values6, src_values7; 36080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 36090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* pos = ((row + 1) * intra_pred_ang); */ 36100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar res_temp5_4x32b = _mm_mullo_epi16(row_4x32b, intra_pred_ang_4x32b); 36110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 36120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract = pos & (31); */ 36130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values12 = _mm_add_epi16(two_nt_4x32b, _mm_srai_epi16(res_temp5_4x32b, 5)); 36140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 36150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* idx = pos >> 5; */ 36160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values11 = _mm_and_si128(res_temp5_4x32b, const_temp2_4x32b); 36170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 36180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(32 - fract) */ 36190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values10 = _mm_sub_epi16(const_temp3_4x32b, src_values11); 36200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 36210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_slli_epi16(src_values11, 8); 36220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_slli_epi16(src_values10, 8); 36230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 36240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values11 = _mm_or_si128(src_values11, fract1_8x16b); 36250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values10 = _mm_or_si128(src_values10, fract2_8x16b); /*(32 - fract) */ 36260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 36270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_unpackhi_epi8(src_values11, src_values10); 36280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_unpacklo_epi8(src_values11, src_values10); 36290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 36300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1 = _mm_shuffle_epi32(fract1_8x16b, 0x00); 36310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp2 = _mm_shuffle_epi32(fract1_8x16b, 0x55); 36320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp3 = _mm_shuffle_epi32(fract1_8x16b, 0xaa); 36330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp4 = _mm_shuffle_epi32(fract1_8x16b, 0xff); 36340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 36350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp11 = _mm_shuffle_epi32(fract2_8x16b, 0x00); 36360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp12 = _mm_shuffle_epi32(fract2_8x16b, 0x55); 36370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp13 = _mm_shuffle_epi32(fract2_8x16b, 0xaa); 36380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp14 = _mm_shuffle_epi32(fract2_8x16b, 0xff); 36390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 36400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_add_epi16(row_4x32b, const_temp8_4x32b); 36410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)ref_main_idx, src_values12); 36420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(col = 0; col < nt; col += 16) 36430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 36440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[0] + col)); 36450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[1] + col)); 36460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[2] + col)); 36470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[3] + col)); 36480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[0] + 8 + col)); 36490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[1] + 8 + col)); 36500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[2] + 8 + col)); 36510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[3] + 8 + col)); 36520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 36530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_shuffle_epi8(src_values0, sm3); 36540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_shuffle_epi8(src_values1, sm3); 36550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_shuffle_epi8(src_values2, sm3); 36560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_shuffle_epi8(src_values3, sm3); 36570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_shuffle_epi8(src_values4, sm3); 36580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_shuffle_epi8(src_values5, sm3); 36590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_shuffle_epi8(src_values6, sm3); 36600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_shuffle_epi8(src_values7, sm3); 36610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 36620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 36630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_maddubs_epi16(src_values0, temp1); 36640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_maddubs_epi16(src_values1, temp2); 36650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_maddubs_epi16(src_values2, temp3); 36660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_maddubs_epi16(src_values3, temp4); 36670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_maddubs_epi16(src_values4, temp1); 36680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_maddubs_epi16(src_values5, temp2); 36690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_maddubs_epi16(src_values6, temp3); 36700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_maddubs_epi16(src_values7, temp4); 36710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 36720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 36730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_add_epi16(src_values0, const_temp_8x16b); 36740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_add_epi16(src_values1, const_temp_8x16b); 36750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_add_epi16(src_values2, const_temp_8x16b); 36760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_add_epi16(src_values3, const_temp_8x16b); 36770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_add_epi16(src_values4, const_temp_8x16b); 36780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_add_epi16(src_values5, const_temp_8x16b); 36790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_add_epi16(src_values6, const_temp_8x16b); 36800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_add_epi16(src_values7, const_temp_8x16b); 36810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 36820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 36830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_srai_epi16(src_values0, 5); 36840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_srai_epi16(src_values1, 5); 36850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_srai_epi16(src_values2, 5); 36860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_srai_epi16(src_values3, 5); 36870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_srai_epi16(src_values4, 5); 36880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_srai_epi16(src_values5, 5); 36890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_srai_epi16(src_values6, 5); 36900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_srai_epi16(src_values7, 5); 36910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 36920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 36930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_packus_epi16(src_values0, src_values4); 36940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_packus_epi16(src_values1, src_values5); 36950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_packus_epi16(src_values2, src_values6); 36960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_packus_epi16(src_values3, src_values7); 36970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 36980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loading 8-bit 8 pixels values */ 36990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + col + (0) * dst_strd), src_values0); /* row=0*/ 37000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + col + (1) * dst_strd), src_values1); /* row=1*/ 37010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + col + (2) * dst_strd), src_values2); /* row=2*/ 37020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + col + (3) * dst_strd), src_values3); /* row=3*/ 37030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 37040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 37050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[4] + col)); 37060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[5] + col)); 37070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[6] + col)); 37080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[7] + col)); 37090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[4] + 8 + col)); 37100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[5] + 8 + col)); 37110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[6] + 8 + col)); 37120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[7] + 8 + col)); 37130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 37140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_shuffle_epi8(src_values0, sm3); 37150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_shuffle_epi8(src_values1, sm3); 37160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_shuffle_epi8(src_values2, sm3); 37170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_shuffle_epi8(src_values3, sm3); 37180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_shuffle_epi8(src_values4, sm3); 37190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_shuffle_epi8(src_values5, sm3); 37200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_shuffle_epi8(src_values6, sm3); 37210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_shuffle_epi8(src_values7, sm3); 37220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 37230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 37240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_maddubs_epi16(src_values0, temp11); 37250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_maddubs_epi16(src_values1, temp12); 37260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_maddubs_epi16(src_values2, temp13); 37270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_maddubs_epi16(src_values3, temp14); 37280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_maddubs_epi16(src_values4, temp11); 37290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_maddubs_epi16(src_values5, temp12); 37300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_maddubs_epi16(src_values6, temp13); 37310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_maddubs_epi16(src_values7, temp14); 37320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 37330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 37340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_add_epi16(src_values0, const_temp_8x16b); 37350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_add_epi16(src_values1, const_temp_8x16b); 37360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_add_epi16(src_values2, const_temp_8x16b); 37370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_add_epi16(src_values3, const_temp_8x16b); 37380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_add_epi16(src_values4, const_temp_8x16b); 37390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_add_epi16(src_values5, const_temp_8x16b); 37400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_add_epi16(src_values6, const_temp_8x16b); 37410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_add_epi16(src_values7, const_temp_8x16b); 37420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 37430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 37440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_srai_epi16(src_values0, 5); 37450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_srai_epi16(src_values1, 5); 37460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_srai_epi16(src_values2, 5); 37470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_srai_epi16(src_values3, 5); 37480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_srai_epi16(src_values4, 5); 37490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_srai_epi16(src_values5, 5); 37500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_srai_epi16(src_values6, 5); 37510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_srai_epi16(src_values7, 5); 37520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 37530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 37540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_packus_epi16(src_values0, src_values4); 37550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_packus_epi16(src_values1, src_values5); 37560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_packus_epi16(src_values2, src_values6); 37570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_packus_epi16(src_values3, src_values7); 37580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 37590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loading 8-bit 8 pixels values */ 37600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + col + (4) * dst_strd), src_values0); /* row=4*/ 37610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + col + (5) * dst_strd), src_values1); /* row=5*/ 37620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + col + (6) * dst_strd), src_values2); /* row=6*/ 37630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + col + (7) * dst_strd), src_values3); /* row=7*/ 37640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 37650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 37660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst += 8 * dst_strd; 37670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 37680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 37690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 37700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else if(nt == 16) /* for nt = 16 case */ 37710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 37720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 37730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i const_temp2_4x32b, const_temp3_4x32b, const_temp8_4x32b; 37740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_values10, src_values11, intra_pred_ang_4x32b; 37750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i row_4x32b, two_nt_4x32b, src_values12; 37760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 37770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 37780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp2_4x32b = _mm_set1_epi16(31); 37790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp3_4x32b = _mm_set1_epi16(32); 37800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp8_4x32b = _mm_set1_epi16(8); 37810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 37820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_nt_4x32b = _mm_set1_epi16(two_nt + 1); 37830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 37840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* intra_pred_ang = gai4_ihevc_ang_table[mode]; */ 37850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar intra_pred_ang_4x32b = _mm_set1_epi16(intra_pred_ang); 37860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 37870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1); 37880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 37890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = 0; row < nt; row += 8) 37900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 37910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 37920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 ref_main_idx[9]; 37930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 37940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i res_temp5_4x32b; 37950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i fract1_8x16b, fract2_8x16b; 37960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_values0, src_values1, src_values2, src_values3; 37970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_values4, src_values5, src_values6, src_values7; 37980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 37990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* pos = ((row + 1) * intra_pred_ang); */ 38000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar res_temp5_4x32b = _mm_mullo_epi16(row_4x32b, intra_pred_ang_4x32b); 38010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 38020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract = pos & (31); */ 38030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values12 = _mm_add_epi16(two_nt_4x32b, _mm_srai_epi16(res_temp5_4x32b, 5)); 38040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 38050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* idx = pos >> 5; */ 38060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values11 = _mm_and_si128(res_temp5_4x32b, const_temp2_4x32b); 38070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 38080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(32 - fract) */ 38090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values10 = _mm_sub_epi16(const_temp3_4x32b, src_values11); 38100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 38110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_slli_epi16(src_values11, 8); 38120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_slli_epi16(src_values10, 8); 38130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 38140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values11 = _mm_or_si128(src_values11, fract1_8x16b); 38150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values10 = _mm_or_si128(src_values10, fract2_8x16b); /*(32 - fract) */ 38160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 38170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_unpackhi_epi8(src_values11, src_values10); 38180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_unpacklo_epi8(src_values11, src_values10); 38190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 38200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1 = _mm_shuffle_epi32(fract1_8x16b, 0x00); 38210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp2 = _mm_shuffle_epi32(fract1_8x16b, 0x55); 38220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp3 = _mm_shuffle_epi32(fract1_8x16b, 0xaa); 38230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp4 = _mm_shuffle_epi32(fract1_8x16b, 0xff); 38240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 38250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp11 = _mm_shuffle_epi32(fract2_8x16b, 0x00); 38260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp12 = _mm_shuffle_epi32(fract2_8x16b, 0x55); 38270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp13 = _mm_shuffle_epi32(fract2_8x16b, 0xaa); 38280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp14 = _mm_shuffle_epi32(fract2_8x16b, 0xff); 38290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 38300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_add_epi16(row_4x32b, const_temp8_4x32b); 38310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)ref_main_idx, src_values12); 38320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 38330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 38340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[0])); 38350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[1])); 38360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[2])); 38370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[3])); 38380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[0] + 8)); 38390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[1] + 8)); 38400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[2] + 8)); 38410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[3] + 8)); 38420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 38430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_shuffle_epi8(src_values0, sm3); 38440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_shuffle_epi8(src_values1, sm3); 38450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_shuffle_epi8(src_values2, sm3); 38460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_shuffle_epi8(src_values3, sm3); 38470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_shuffle_epi8(src_values4, sm3); 38480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_shuffle_epi8(src_values5, sm3); 38490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_shuffle_epi8(src_values6, sm3); 38500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_shuffle_epi8(src_values7, sm3); 38510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 38520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 38530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_maddubs_epi16(src_values0, temp1); 38540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_maddubs_epi16(src_values1, temp2); 38550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_maddubs_epi16(src_values2, temp3); 38560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_maddubs_epi16(src_values3, temp4); 38570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_maddubs_epi16(src_values4, temp1); 38580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_maddubs_epi16(src_values5, temp2); 38590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_maddubs_epi16(src_values6, temp3); 38600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_maddubs_epi16(src_values7, temp4); 38610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 38620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 38630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_add_epi16(src_values0, const_temp_8x16b); 38640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_add_epi16(src_values1, const_temp_8x16b); 38650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_add_epi16(src_values2, const_temp_8x16b); 38660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_add_epi16(src_values3, const_temp_8x16b); 38670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_add_epi16(src_values4, const_temp_8x16b); 38680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_add_epi16(src_values5, const_temp_8x16b); 38690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_add_epi16(src_values6, const_temp_8x16b); 38700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_add_epi16(src_values7, const_temp_8x16b); 38710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 38720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 38730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_srai_epi16(src_values0, 5); 38740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_srai_epi16(src_values1, 5); 38750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_srai_epi16(src_values2, 5); 38760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_srai_epi16(src_values3, 5); 38770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_srai_epi16(src_values4, 5); 38780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_srai_epi16(src_values5, 5); 38790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_srai_epi16(src_values6, 5); 38800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_srai_epi16(src_values7, 5); 38810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 38820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 38830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_packus_epi16(src_values0, src_values4); 38840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_packus_epi16(src_values1, src_values5); 38850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_packus_epi16(src_values2, src_values6); 38860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_packus_epi16(src_values3, src_values7); 38870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 38880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loading 8-bit 8 pixels values */ 38890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + (0) * dst_strd), src_values0); /* row=0*/ 38900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + (1) * dst_strd), src_values1); /* row=1*/ 38910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + (2) * dst_strd), src_values2); /* row=2*/ 38920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + (3) * dst_strd), src_values3); /* row=3*/ 38930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 38940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 38950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[4])); 38960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[5])); 38970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[6])); 38980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[7])); 38990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[4] + 8)); 39000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[5] + 8)); 39010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[6] + 8)); 39020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[7] + 8)); 39030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 39040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_shuffle_epi8(src_values0, sm3); 39050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_shuffle_epi8(src_values1, sm3); 39060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_shuffle_epi8(src_values2, sm3); 39070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_shuffle_epi8(src_values3, sm3); 39080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_shuffle_epi8(src_values4, sm3); 39090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_shuffle_epi8(src_values5, sm3); 39100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_shuffle_epi8(src_values6, sm3); 39110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_shuffle_epi8(src_values7, sm3); 39120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 39130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 39140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_maddubs_epi16(src_values0, temp11); 39150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_maddubs_epi16(src_values1, temp12); 39160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_maddubs_epi16(src_values2, temp13); 39170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_maddubs_epi16(src_values3, temp14); 39180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_maddubs_epi16(src_values4, temp11); 39190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_maddubs_epi16(src_values5, temp12); 39200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_maddubs_epi16(src_values6, temp13); 39210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_maddubs_epi16(src_values7, temp14); 39220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 39230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 39240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_add_epi16(src_values0, const_temp_8x16b); 39250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_add_epi16(src_values1, const_temp_8x16b); 39260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_add_epi16(src_values2, const_temp_8x16b); 39270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_add_epi16(src_values3, const_temp_8x16b); 39280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_add_epi16(src_values4, const_temp_8x16b); 39290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_add_epi16(src_values5, const_temp_8x16b); 39300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_add_epi16(src_values6, const_temp_8x16b); 39310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_add_epi16(src_values7, const_temp_8x16b); 39320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 39330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 39340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_srai_epi16(src_values0, 5); 39350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_srai_epi16(src_values1, 5); 39360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_srai_epi16(src_values2, 5); 39370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_srai_epi16(src_values3, 5); 39380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_srai_epi16(src_values4, 5); 39390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_srai_epi16(src_values5, 5); 39400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_srai_epi16(src_values6, 5); 39410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_srai_epi16(src_values7, 5); 39420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 39430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 39440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_packus_epi16(src_values0, src_values4); 39450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_packus_epi16(src_values1, src_values5); 39460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_packus_epi16(src_values2, src_values6); 39470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_packus_epi16(src_values3, src_values7); 39480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 39490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loading 8-bit 8 pixels values */ 39500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + (4) * dst_strd), src_values0); /* row=4*/ 39510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + (5) * dst_strd), src_values1); /* row=5*/ 39520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + (6) * dst_strd), src_values2); /* row=6*/ 39530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)(pu1_dst + (7) * dst_strd), src_values3); /* row=7*/ 39540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 39550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 39560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst += 8 * dst_strd; 39570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 39580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 39590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 39600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else if(nt == 8) 39610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 39620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 39630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i const_temp2_4x32b, const_temp3_4x32b; 39640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_values10, src_values11, intra_pred_ang_4x32b; 39650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i row_4x32b, two_nt_4x32b, src_values12; 39660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 39670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 39680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp2_4x32b = _mm_set1_epi16(31); 39690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp3_4x32b = _mm_set1_epi16(32); 39700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 39710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_nt_4x32b = _mm_set1_epi16(two_nt + 1); 39720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 39730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 39740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* intra_pred_ang = gai4_ihevc_ang_table[mode]; */ 39750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar intra_pred_ang_4x32b = _mm_set1_epi16(intra_pred_ang); 39760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 39770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1); 39780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 39790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar //for(row = 0; row < nt; row +=4) 39800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 39810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 39820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 ref_main_idx[9]; 39830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 39840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i res_temp5_4x32b; 39850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i fract1_8x16b, fract2_8x16b; 39860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_values0, src_values1, src_values2, src_values3; 39870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_values4, src_values5, src_values6, src_values7; 39880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 39890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* pos = ((row + 1) * intra_pred_ang); */ 39900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar res_temp5_4x32b = _mm_mullo_epi16(row_4x32b, intra_pred_ang_4x32b); 39910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 39920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract = pos & (31); */ 39930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values12 = _mm_add_epi16(two_nt_4x32b, _mm_srai_epi16(res_temp5_4x32b, 5)); 39940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 39950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* idx = pos >> 5; */ 39960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values11 = _mm_and_si128(res_temp5_4x32b, const_temp2_4x32b); 39970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 39980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(32 - fract) */ 39990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values10 = _mm_sub_epi16(const_temp3_4x32b, src_values11); 40000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 40010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_slli_epi16(src_values11, 8); 40020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_slli_epi16(src_values10, 8); 40030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 40040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values11 = _mm_or_si128(src_values11, fract1_8x16b); 40050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values10 = _mm_or_si128(src_values10, fract2_8x16b); /*(32 - fract) */ 40060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 40070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_unpackhi_epi8(src_values11, src_values10); 40080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_unpacklo_epi8(src_values11, src_values10); 40090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 40100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1 = _mm_shuffle_epi32(fract1_8x16b, 0x00); 40110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp2 = _mm_shuffle_epi32(fract1_8x16b, 0x55); 40120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp3 = _mm_shuffle_epi32(fract1_8x16b, 0xaa); 40130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp4 = _mm_shuffle_epi32(fract1_8x16b, 0xff); 40140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 40150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp11 = _mm_shuffle_epi32(fract2_8x16b, 0x00); 40160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp12 = _mm_shuffle_epi32(fract2_8x16b, 0x55); 40170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp13 = _mm_shuffle_epi32(fract2_8x16b, 0xaa); 40180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp14 = _mm_shuffle_epi32(fract2_8x16b, 0xff); 40190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 40200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storeu_si128((__m128i *)ref_main_idx, src_values12); 40210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 40220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[0])); /* col = 0-7 */ 40230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[1])); /* col = 8-15 */ 40240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[2])); /* col = 16-23 */ 40250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[3])); /* col = 24-31 */ 40260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[4])); /* col = 32-39 */ 40270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[5])); /* col = 40-47 */ 40280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[6])); /* col = 48-55 */ 40290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx[7])); /* col = 56-63*/ 40300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 40310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_shuffle_epi8(src_values0, sm3); 40320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_shuffle_epi8(src_values1, sm3); 40330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_shuffle_epi8(src_values2, sm3); 40340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_shuffle_epi8(src_values3, sm3); 40350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_shuffle_epi8(src_values4, sm3); 40360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_shuffle_epi8(src_values5, sm3); 40370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_shuffle_epi8(src_values6, sm3); 40380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_shuffle_epi8(src_values7, sm3); 40390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 40400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 40410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_maddubs_epi16(src_values0, temp1); 40420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_maddubs_epi16(src_values1, temp2); 40430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_maddubs_epi16(src_values2, temp3); 40440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_maddubs_epi16(src_values3, temp4); 40450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_maddubs_epi16(src_values4, temp11); 40460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_maddubs_epi16(src_values5, temp12); 40470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_maddubs_epi16(src_values6, temp13); 40480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_maddubs_epi16(src_values7, temp14); 40490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 40500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 40510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_add_epi16(src_values0, const_temp_8x16b); 40520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_add_epi16(src_values1, const_temp_8x16b); 40530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_add_epi16(src_values2, const_temp_8x16b); 40540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_add_epi16(src_values3, const_temp_8x16b); 40550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_add_epi16(src_values4, const_temp_8x16b); 40560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_add_epi16(src_values5, const_temp_8x16b); 40570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_add_epi16(src_values6, const_temp_8x16b); 40580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_add_epi16(src_values7, const_temp_8x16b); 40590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 40600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 40610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_srai_epi16(src_values0, 5); 40620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_srai_epi16(src_values1, 5); 40630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_srai_epi16(src_values2, 5); 40640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_srai_epi16(src_values3, 5); 40650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_srai_epi16(src_values4, 5); 40660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_srai_epi16(src_values5, 5); 40670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_srai_epi16(src_values6, 5); 40680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_srai_epi16(src_values7, 5); 40690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 40700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 40710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_packus_epi16(src_values0, src_values1); 40720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_packus_epi16(src_values2, src_values3); 40730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_srli_si128(src_values0, 8); 40740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_srli_si128(src_values2, 8); 40750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values4 = _mm_packus_epi16(src_values4, src_values5); 40760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values6 = _mm_packus_epi16(src_values6, src_values7); 40770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values5 = _mm_srli_si128(src_values4, 8); 40780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values7 = _mm_srli_si128(src_values6, 8); 40790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 40800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loading 8-bit 8 pixels values */ 40810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), src_values0); /* row=0*/ 40820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), src_values1); /* row=1*/ 40830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), src_values2); /* row=2*/ 40840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), src_values3); /* row=3*/ 40850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), src_values4); /* row=4*/ 40860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), src_values5); /* row=5*/ 40870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), src_values6); /* row=6*/ 40880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), src_values7); /* row=7*/ 40890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 40900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 40910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 40920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar else /* if nt =4*/ 40930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 40940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 40950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i const_temp2_4x32b, const_temp3_4x32b; 40960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_values10, src_values11, intra_pred_ang_4x32b; 40970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 40980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i row_4x32b, two_nt_4x32b, src_values12; 40990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp2_4x32b = _mm_set1_epi32(31); 41020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar const_temp3_4x32b = _mm_set1_epi32(32); 41030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar two_nt_4x32b = _mm_set1_epi32(two_nt + 1); 41050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* intra_pred_ang = gai4_ihevc_ang_table[mode]; */ 41080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar intra_pred_ang_4x32b = _mm_set1_epi32(intra_pred_ang); 41090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar row_4x32b = _mm_set_epi32(4, 3, 2, 1); 41110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 41120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int temp11, temp21, temp31, temp41; 41130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 ref_main_idx1, ref_main_idx2, ref_main_idx3, ref_main_idx4; 41150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i fract1_8x16b, fract2_8x16b, res_temp5_4x32b; 41170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i src_values0, src_values1, src_values2, src_values3; 41180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar __m128i ref_main_temp0, ref_main_temp1, ref_main_temp2; 41190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* pos = ((row + 1) * intra_pred_ang); */ 41210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar res_temp5_4x32b = _mm_mullo_epi32(row_4x32b, intra_pred_ang_4x32b); 41220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* fract = pos & (31); */ 41240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values12 = _mm_add_epi32(two_nt_4x32b, _mm_srai_epi32(res_temp5_4x32b, 5)); 41250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_temp0 = _mm_srli_si128(src_values12, 4); /* next 32 bit values */ 41270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_temp1 = _mm_srli_si128(src_values12, 8); /* next 32 bit values */ 41280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_temp2 = _mm_srli_si128(src_values12, 12); /* next 32 bit values */ 41290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_idx1 = _mm_cvtsi128_si32(src_values12); /* row=0*/ 41300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_idx2 = _mm_cvtsi128_si32(ref_main_temp0); /* row=1*/ 41310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_idx3 = _mm_cvtsi128_si32(ref_main_temp1); /* row=2*/ 41320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar ref_main_idx4 = _mm_cvtsi128_si32(ref_main_temp2); /* row=3*/ 41330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* idx = pos >> 5; */ 41350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values11 = _mm_and_si128(res_temp5_4x32b, const_temp2_4x32b); 41360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*(32 - fract) */ 41380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values10 = _mm_sub_epi32(const_temp3_4x32b, src_values11); 41390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_slli_epi16(src_values11, 8); 41410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_slli_epi16(src_values10, 8); 41420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values11 = _mm_or_si128(src_values11, fract1_8x16b); 41440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values10 = _mm_or_si128(src_values10, fract2_8x16b); /*(32 - fract) */ 41450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract2_8x16b = _mm_unpackhi_epi8(src_values11, src_values10); 41470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar fract1_8x16b = _mm_unpacklo_epi8(src_values11, src_values10); 41480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp1 = _mm_shuffle_epi32(fract1_8x16b, 0x00); 41500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp2 = _mm_shuffle_epi32(fract1_8x16b, 0xaa); 41510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp3 = _mm_shuffle_epi32(fract2_8x16b, 0x00); 41520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp4 = _mm_shuffle_epi32(fract2_8x16b, 0xaa); 41530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx1)); /* col = 0-7 */ 41550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx2)); /* col = 8-15 */ 41560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx3)); /* col = 16-23 */ 41570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_loadu_si128((__m128i *)(pu1_ref + ref_main_idx4)); /* col = 24-31 */ 41580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_shuffle_epi8(src_values0, sm3); 41600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_shuffle_epi8(src_values1, sm3); 41610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_shuffle_epi8(src_values2, sm3); 41620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_shuffle_epi8(src_values3, sm3); 41630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_maddubs_epi16(src_values0, temp1); 41650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_maddubs_epi16(src_values1, temp2); 41660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_maddubs_epi16(src_values2, temp3); 41670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_maddubs_epi16(src_values3, temp4); 41680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16)*/ 41700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_add_epi16(src_values0, const_temp_8x16b); 41710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_add_epi16(src_values1, const_temp_8x16b); 41720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_add_epi16(src_values2, const_temp_8x16b); 41730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_add_epi16(src_values3, const_temp_8x16b); 41740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /*((32 - fract)* pu1_ref[ref_main_idx]+ fract * pu1_ref[ref_main_idx + 1] + 16) >>5*/ 41760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_srai_epi16(src_values0, 5); 41770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_srai_epi16(src_values1, 5); 41780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_srai_epi16(src_values2, 5); 41790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_srai_epi16(src_values3, 5); 41800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* converting 16 bit to 8 bit */ 41820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values0 = _mm_packus_epi16(src_values0, src_values1); 41830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values2 = _mm_packus_epi16(src_values2, src_values3); 41840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values1 = _mm_srli_si128(src_values0, 8); 41850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar src_values3 = _mm_srli_si128(src_values2, 8); 41860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp11 = _mm_cvtsi128_si32(src_values0); 41880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp21 = _mm_cvtsi128_si32(src_values1); 41890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp31 = _mm_cvtsi128_si32(src_values2); 41900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar temp41 = _mm_cvtsi128_si32(src_values3); 41910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* loding 4-bit 8 pixels values */ 41930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(0 * dst_strd)]) = temp11; 41940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(1 * dst_strd)]) = temp21; 41950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(2 * dst_strd)]) = temp31; 41960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar *(WORD32 *)(&pu1_dst[(3 * dst_strd)]) = temp41; 41970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 41980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 41990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 42000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar} 42010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4202