10d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/****************************************************************************** 20d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 30d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 40d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 50d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Licensed under the Apache License, Version 2.0 (the "License"); 60d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* you may not use this file except in compliance with the License. 70d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* You may obtain a copy of the License at: 80d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 90d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* http://www.apache.org/licenses/LICENSE-2.0 100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Unless required by applicable law or agreed to in writing, software 120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* distributed under the License is distributed on an "AS IS" BASIS, 130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* See the License for the specific language governing permissions and 150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* limitations under the License. 160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************/ 180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/** 190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @file 210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* ihevc_weighted_pred_neon_intr.c 220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @brief 240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Contains function definitions for weighted prediction used in inter 250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* prediction 260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @author 280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Parthiban V 290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @par List of Functions: 310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* - ihevc_weighted_pred_uni() 320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* - ihevc_weighted_pred_bi() 330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* - ihevc_weighted_pred_bi_default() 340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @remarks 360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* None 370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*/ 400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/*****************************************************************************/ 410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/* File Includes */ 420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/*****************************************************************************/ 430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_typedefs.h" 440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_defs.h" 450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_macros.h" 460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_func_selector.h" 470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_inter_pred.h" 480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "arm_neon.h" 490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/** 520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @brief 550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Does uni-weighted prediction on the array pointed by pi2_src and stores 560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* it at the location pointed by pi2_dst Assumptions : The function is 570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* optimized considering the fact Width and height are multiple of 2. 580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @par Description: 600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* dst = ( (src + lvl_shift) * wgt0 + (1 << (shift - 1)) ) >> shift + 610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* offset 620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pi2_src 640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Pointer to the source 650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[out] pu1_dst 670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Pointer to the destination 680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] src_strd 700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Source stride 710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] dst_strd 730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Destination stride 740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] wgt0 760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* weight to be multiplied to the source 770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] off0 790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* offset to be added after rounding and 800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] shifting 820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] shift 850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* (14 Bit depth) + log2_weight_denominator 860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] lvl_shift 880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* added before shift and offset 890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] ht 910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* height of the source 920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] wd 940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* width of the source 950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @returns 970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @remarks 990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* None 1000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 1020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*/ 1030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarvoid ihevc_weighted_pred_uni_neonintr(WORD16 *pi2_src, 1050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst, 1060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 src_strd, 1070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 dst_strd, 1080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 wgt0, 1090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 off0, 1100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 shift, 1110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 lvl_shift, 1120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 ht, 1130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 wd) 1140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar{ 1150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 row, col; 1160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t pi2_src_val1; 1170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t pi2_src_val2; 1180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t i4_tmp1_t; 1190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t i4_tmp2_t; 1200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t sto_res_tmp1; 1210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar uint16x4_t sto_res_tmp2; 1220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar uint16x8_t sto_res_tmp3; 1230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar uint8x8_t sto_res; 1240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t tmp_lvl_shift_t; 1250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 tmp_shift = 0 - shift; 1260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t tmp_shift_t; 1270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 *pi2_src_tmp; 1280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst_tmp; 1290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 tmp_lvl_shift = lvl_shift * wgt0 + (off0 << shift); 1310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_lvl_shift += (1 << (shift - 1)); 1320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_lvl_shift_t = vmovq_n_s32(tmp_lvl_shift); 1330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_shift_t = vmovq_n_s32(tmp_shift); 1340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Used i4_tmp1_t & i4_tmp1_t to process 2 rows at a time. */ 1360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* height has also been unrolled, hence 2 rows will processed at a time */ 1370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* store also has been taken care for two row process */ 1380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* vcombine_u16 has been used since after narrowing we get 16x4 value which can't be */ 1390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* saturated and narrowed */ 1400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = ht; row > 0; row -= 2) 1420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 1430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(col = wd; col > 0; col -= 4) 1440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 1450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src_tmp = pi2_src + src_strd; 1460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst_tmp = pu1_dst + dst_strd; 1480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src_val1 = vld1_s16((int16_t *)pi2_src); 1500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src += 4; 1510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src_val2 = vld1_s16((int16_t *)pi2_src_tmp); 1530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp1_t = vmull_n_s16(pi2_src_val1, (int16_t)wgt0); 1540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp1_t = vaddq_s32(i4_tmp1_t, tmp_lvl_shift_t); 1560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp2_t = vmull_n_s16(pi2_src_val2, (int16_t)wgt0); 1570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp1 = vshlq_s32(i4_tmp1_t, tmp_shift_t); 1590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp2_t = vaddq_s32(i4_tmp2_t, tmp_lvl_shift_t); 1600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp2 = vqmovun_s32(sto_res_tmp1); 1620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp3 = vcombine_u16(sto_res_tmp2, sto_res_tmp2); 1630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp1 = vshlq_s32(i4_tmp2_t, tmp_shift_t); 1650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res = vqmovn_u16(sto_res_tmp3); 1660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp2 = vqmovun_s32(sto_res_tmp1); 1680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp3 = vcombine_u16(sto_res_tmp2, sto_res_tmp2); 1690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1_lane_u32((uint32_t *)pu1_dst, vreinterpret_u32_u8(sto_res), 0); 1710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst += 4; 1720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res = vqmovn_u16(sto_res_tmp3); 1740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1_lane_u32((uint32_t *)pu1_dst_tmp, vreinterpret_u32_u8(sto_res), 0); 1750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 1760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src += 2 * src_strd - wd; 1770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst += 2 * dst_strd - wd; 1780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 1790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar} 1800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//WEIGHTED_PRED_UNI 1810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 1820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/** 1830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 1840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @brief 1860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Chroma uni-weighted prediction on the array pointed by pi2_src and stores 1870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* it at the location pointed by pi2_dst Assumptions : The function is 1880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* optimized considering the fact Width and height are multiple of 2. 1890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @par Description: 1910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* dst = ( (src + lvl_shift) * wgt0 + (1 << (shift - 1)) ) >> shift + 1920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* offset 1930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pi2_src 1950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Pointer to the source 1960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 1970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[out] pu1_dst 1980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Pointer to the destination 1990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] src_strd 2010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Source stride 2020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] dst_strd 2040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Destination stride 2050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] wgt0 2070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* weight to be multiplied to the source 2080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] off0 2100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* offset to be added after rounding and 2110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] shifting 2130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] shift 2160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* (14 Bit depth) + log2_weight_denominator 2170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] lvl_shift 2190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* added before shift and offset 2200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] ht 2220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* height of the source 2230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] wd 2250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* width of the source 2260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @returns 2280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @remarks 2300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* None 2310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 2320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 2330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*/ 2340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarvoid ihevc_weighted_pred_chroma_uni_neonintr(WORD16 *pi2_src, 2360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst, 2370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 src_strd, 2380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 dst_strd, 2390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 wgt0_cb, 2400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 wgt0_cr, 2410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 off0_cb, 2420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 off0_cr, 2430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 shift, 2440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 lvl_shift, 2450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 ht, 2460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 wd) 2470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar{ 2480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 row, col; 2490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t pi2_src_val1; 2500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t pi2_src_val2; 2510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t i4_tmp1_t; 2520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t i4_tmp2_t; 2530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t sto_res_tmp1; 2540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar uint16x4_t sto_res_tmp2; 2550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar uint16x8_t sto_res_tmp3; 2560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar uint8x8_t sto_res; 2570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t tmp_lvl_shift_t_u, tmp_lvl_shift_t_v; 2580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4x2_t tmp_lvl_shift_t; 2590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 tmp_shift = 0 - shift; 2600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t tmp_shift_t; 2610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t tmp_wgt0_u, tmp_wgt0_v; 2620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4x2_t wgt0; 2630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 *pi2_src_tmp; 2640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst_tmp; 2650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 tmp_lvl_shift = lvl_shift * wgt0_cb + (off0_cb << shift); 2670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_lvl_shift += (1 << (shift - 1)); 2680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_lvl_shift_t_u = vmovq_n_s32(tmp_lvl_shift); 2690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_lvl_shift = lvl_shift * wgt0_cr + (off0_cr << shift); 2710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_lvl_shift += (1 << (shift - 1)); 2720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_lvl_shift_t_v = vmovq_n_s32(tmp_lvl_shift); 2730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_lvl_shift_t = vzipq_s32(tmp_lvl_shift_t_u, tmp_lvl_shift_t_v); 2750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_shift_t = vmovq_n_s32(tmp_shift); 2770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_wgt0_u = vdup_n_s16(wgt0_cb); 2790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_wgt0_v = vdup_n_s16(wgt0_cr); 2800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar wgt0 = vzip_s16(tmp_wgt0_u, tmp_wgt0_v); 2810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Used i4_tmp1_t & i4_tmp1_t to process 2 rows at a time. */ 2830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* height has also been unrolled, hence 2 rows will processed at a time */ 2840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* store also has been taken care for two row process */ 2850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* vcombine_u16 has been used since after narrowing we get 16x4 value which can't be */ 2860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* saturated and narrowed */ 2870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = ht; row > 0; row -= 2) 2890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 2900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(col = 2 * wd; col > 0; col -= 4) 2910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 2920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src_tmp = pi2_src + src_strd; 2930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst_tmp = pu1_dst + dst_strd; 2950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src_val1 = vld1_s16((int16_t *)pi2_src); 2970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src += 4; 2980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 2990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src_val2 = vld1_s16((int16_t *)pi2_src_tmp); 3000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp1_t = vmull_s16(pi2_src_val1, wgt0.val[0]); 3010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp1_t = vaddq_s32(i4_tmp1_t, tmp_lvl_shift_t.val[0]); 3030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp2_t = vmull_s16(pi2_src_val2, wgt0.val[0]); 3040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp1 = vshlq_s32(i4_tmp1_t, tmp_shift_t); 3060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp2_t = vaddq_s32(i4_tmp2_t, tmp_lvl_shift_t.val[0]); 3070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp2 = vqmovun_s32(sto_res_tmp1); 3090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp3 = vcombine_u16(sto_res_tmp2, sto_res_tmp2); 3100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp1 = vshlq_s32(i4_tmp2_t, tmp_shift_t); 3120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res = vqmovn_u16(sto_res_tmp3); 3130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp2 = vqmovun_s32(sto_res_tmp1); 3150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp3 = vcombine_u16(sto_res_tmp2, sto_res_tmp2); 3160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1_lane_u32((uint32_t *)pu1_dst, vreinterpret_u32_u8(sto_res), 0); 3180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst += 4; 3190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res = vqmovn_u16(sto_res_tmp3); 3210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1_lane_u32((uint32_t *)pu1_dst_tmp, vreinterpret_u32_u8(sto_res), 0); 3220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 3230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src += 2 * src_strd - 2 * wd; 3240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst += 2 * dst_strd - 2 * wd; 3250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 3260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar} 3270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//WEIGHTED_PRED_CHROMA_UNI 3280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/** 3300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 3310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @brief 3330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Does bi-weighted prediction on the arrays pointed by pi2_src1 and 3340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* pi2_src2 and stores it at location pointed by pi2_dst Assumptions : The 3350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* function is optimized considering the fact Width and height are multiple 3360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* of 2. 3370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @par Description: 3390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* dst = ( (src1 + lvl_shift1)*wgt0 + (src2 + lvl_shift2)*wgt1 + (off0 + 3400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* off1 + 1) << (shift - 1) ) >> shift 3410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pi2_src1 3430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Pointer to source 1 3440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pi2_src2 3460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Pointer to source 2 3470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[out] pu1_dst 3490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Pointer to destination 3500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] src_strd1 3520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Source stride 1 3530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] src_strd2 3550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Source stride 2 3560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] dst_strd 3580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Destination stride 3590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] wgt0 3610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* weight to be multiplied to source 1 3620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] off0 3640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* offset 0 3650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] wgt1 3670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* weight to be multiplied to source 2 3680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] off1 3700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* offset 1 3710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] shift 3730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* (14 Bit depth) + log2_weight_denominator 3740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] lvl_shift1 3760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* added before shift and offset 3770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] lvl_shift2 3790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* added before shift and offset 3800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] ht 3820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* height of the source 3830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] wd 3850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* width of the source 3860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @returns 3880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @remarks 3900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* None 3910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 3920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 3930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*/ 3940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 3950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarvoid ihevc_weighted_pred_bi_neonintr(WORD16 *pi2_src1, 3960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 *pi2_src2, 3970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst, 3980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 src_strd1, 3990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 src_strd2, 4000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 dst_strd, 4010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 wgt0, 4020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 off0, 4030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 wgt1, 4040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 off1, 4050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 shift, 4060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 lvl_shift1, 4070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 lvl_shift2, 4080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 ht, 4090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 wd) 4100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar{ 4110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 row, col; 4120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t pi2_src1_val1; 4130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t pi2_src1_val2; 4140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t pi2_src2_val1; 4150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t pi2_src2_val2; 4160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t i4_tmp1_t1; 4170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t i4_tmp1_t2; 4180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t i4_tmp2_t1; 4190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t i4_tmp2_t2; 4200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t sto_res_tmp1; 4210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar uint16x4_t sto_res_tmp2; 4220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar uint16x8_t sto_res_tmp3; 4230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar uint8x8_t sto_res; 4240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t tmp_lvl_shift_t; 4250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 tmp_shift = 0 - shift; 4260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t tmp_shift_t; 4270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 *pi2_src_tmp1; 4280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 *pi2_src_tmp2; 4290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst_tmp; 4300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 tmp_lvl_shift = (lvl_shift1 * wgt0) + (lvl_shift2 * wgt1); 4320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_lvl_shift += ((off0 + off1 + 1) << (shift - 1)); 4330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_lvl_shift_t = vmovq_n_s32(tmp_lvl_shift); 4340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_shift_t = vmovq_n_s32(tmp_shift); 4350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Used i4_tmp1_t & i4_tmp1_t to process 2 rows at a time. */ 4370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* height has also been unrolled, hence 2 rows will processed at a time */ 4380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* store also has been taken care for two row process */ 4390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* vcombine_u16 has been used since after narrowing we get 16x4 value which can't be */ 4400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* saturated and narrowed */ 4410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = ht; row > 0; row -= 2) 4430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 4440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(col = wd; col > 0; col -= 4) 4450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 4460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src_tmp1 = pi2_src1 + src_strd1; 4470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src_tmp2 = pi2_src2 + src_strd2; 4480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src1_val1 = vld1_s16((int16_t *)pi2_src1); 4500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src1 += 4; 4510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst_tmp = pu1_dst + dst_strd; 4520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src2_val1 = vld1_s16((int16_t *)pi2_src2); 4540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src2 += 4; 4550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp1_t1 = vmull_n_s16(pi2_src1_val1, (int16_t)wgt0); 4560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src1_val2 = vld1_s16((int16_t *)pi2_src_tmp1); 4580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp1_t2 = vmull_n_s16(pi2_src2_val1, (int16_t)wgt1); 4590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src2_val2 = vld1_s16((int16_t *)pi2_src_tmp2); 4610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp1_t1 = vaddq_s32(i4_tmp1_t1, i4_tmp1_t2); 4620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp2_t1 = vmull_n_s16(pi2_src1_val2, (int16_t)wgt0); 4640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp1_t1 = vaddq_s32(i4_tmp1_t1, tmp_lvl_shift_t); 4650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp2_t2 = vmull_n_s16(pi2_src2_val2, (int16_t)wgt1); 4670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp1 = vshlq_s32(i4_tmp1_t1, tmp_shift_t); 4680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp2_t1 = vaddq_s32(i4_tmp2_t1, i4_tmp2_t2); 4700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp2 = vqmovun_s32(sto_res_tmp1); 4710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp2_t1 = vaddq_s32(i4_tmp2_t1, tmp_lvl_shift_t); 4730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp3 = vcombine_u16(sto_res_tmp2, sto_res_tmp2); 4740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp1 = vshlq_s32(i4_tmp2_t1, tmp_shift_t); 4760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res = vqmovn_u16(sto_res_tmp3); 4770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp2 = vqmovun_s32(sto_res_tmp1); 4790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp3 = vcombine_u16(sto_res_tmp2, sto_res_tmp2); 4800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1_lane_u32((uint32_t *)pu1_dst, vreinterpret_u32_u8(sto_res), 0); 4820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst += 4; 4830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res = vqmovn_u16(sto_res_tmp3); 4850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1_lane_u32((uint32_t *)pu1_dst_tmp, vreinterpret_u32_u8(sto_res), 0); 4860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 4870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src1 += 2 * src_strd1 - wd; 4880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src2 += 2 * src_strd2 - wd; 4890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst += 2 * dst_strd - wd; 4900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 4910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar} 4920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//WEIGHTED_PRED_BI 4930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 4940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/** 4950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 4960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 4970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @brief 4980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Chroma bi-weighted prediction on the arrays pointed by pi2_src1 and 4990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* pi2_src2 and stores it at location pointed by pi2_dst Assumptions : The 5000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* function is optimized considering the fact Width and height are multiple 5010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* of 2. 5020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 5030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @par Description: 5040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* dst = ( (src1 + lvl_shift1)*wgt0 + (src2 + lvl_shift2)*wgt1 + (off0 + 5050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* off1 + 1) << (shift - 1) ) >> shift 5060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 5070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pi2_src1 5080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Pointer to source 1 5090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 5100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pi2_src2 5110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Pointer to source 2 5120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 5130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[out] pu1_dst 5140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Pointer to destination 5150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 5160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] src_strd1 5170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Source stride 1 5180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 5190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] src_strd2 5200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Source stride 2 5210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 5220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] dst_strd 5230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Destination stride 5240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 5250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] wgt0 5260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* weight to be multiplied to source 1 5270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 5280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] off0 5290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* offset 0 5300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 5310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] wgt1 5320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* weight to be multiplied to source 2 5330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 5340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] off1 5350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* offset 1 5360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 5370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] shift 5380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* (14 Bit depth) + log2_weight_denominator 5390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 5400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] lvl_shift1 5410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* added before shift and offset 5420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 5430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] lvl_shift2 5440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* added before shift and offset 5450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 5460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] ht 5470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* height of the source 5480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 5490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] wd 5500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* width of the source 5510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 5520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @returns 5530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 5540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @remarks 5550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* None 5560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 5570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 5580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*/ 5590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 5600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarvoid ihevc_weighted_pred_chroma_bi_neonintr(WORD16 *pi2_src1, 5610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 *pi2_src2, 5620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst, 5630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 src_strd1, 5640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 src_strd2, 5650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 dst_strd, 5660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 wgt0_cb, 5670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 wgt0_cr, 5680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 off0_cb, 5690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 off0_cr, 5700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 wgt1_cb, 5710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 wgt1_cr, 5720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 off1_cb, 5730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 off1_cr, 5740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 shift, 5750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 lvl_shift1, 5760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 lvl_shift2, 5770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 ht, 5780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 wd) 5790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar{ 5800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 row, col; 5810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t pi2_src1_val1; 5820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t pi2_src1_val2; 5830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t pi2_src2_val1; 5840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t pi2_src2_val2; 5850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t i4_tmp1_t1; 5860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t i4_tmp1_t2; 5870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t i4_tmp2_t1; 5880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t i4_tmp2_t2; 5890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t sto_res_tmp1; 5900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar uint16x4_t sto_res_tmp2; 5910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar uint16x8_t sto_res_tmp3; 5920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar uint8x8_t sto_res; 5930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t tmp_lvl_shift_t_u, tmp_lvl_shift_t_v; 5940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4x2_t tmp_lvl_shift_t; 5950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 tmp_shift = 0 - shift; 5960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t tmp_shift_t; 5970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t tmp_wgt0_u, tmp_wgt0_v, tmp_wgt1_u, tmp_wgt1_v; 5980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4x2_t wgt0, wgt1; 5990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 *pi2_src_tmp1; 6000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 *pi2_src_tmp2; 6010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst_tmp; 6020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 tmp_lvl_shift = (lvl_shift1 * wgt0_cb) + (lvl_shift2 * wgt1_cb); 6040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_lvl_shift += ((off0_cb + off1_cb + 1) << (shift - 1)); 6050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_lvl_shift_t_u = vmovq_n_s32(tmp_lvl_shift); 6060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_lvl_shift = (lvl_shift1 * wgt0_cr) + (lvl_shift2 * wgt1_cr); 6080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_lvl_shift += ((off0_cr + off1_cr + 1) << (shift - 1)); 6090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_lvl_shift_t_v = vmovq_n_s32(tmp_lvl_shift); 6100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_lvl_shift_t = vzipq_s32(tmp_lvl_shift_t_u, tmp_lvl_shift_t_v); 6120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_shift_t = vmovq_n_s32(tmp_shift); 6140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_wgt0_u = vdup_n_s16(wgt0_cb); 6160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_wgt0_v = vdup_n_s16(wgt0_cr); 6170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar wgt0 = vzip_s16(tmp_wgt0_u, tmp_wgt0_v); 6180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_wgt1_u = vdup_n_s16(wgt1_cb); 6190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_wgt1_v = vdup_n_s16(wgt1_cr); 6200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar wgt1 = vzip_s16(tmp_wgt1_u, tmp_wgt1_v); 6210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Used i4_tmp1_t & i4_tmp1_t to process 2 rows at a time. */ 6230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* height has also been unrolled, hence 2 rows will processed at a time */ 6240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* store also has been taken care for two row process */ 6250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* vcombine_u16 has been used since after narrowing we get 16x4 value which can't be */ 6260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* saturated and narrowed */ 6270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = ht; row > 0; row -= 2) 6290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 6300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(col = 2 * wd; col > 0; col -= 4) 6310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 6320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src_tmp1 = pi2_src1 + src_strd1; 6330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src_tmp2 = pi2_src2 + src_strd2; 6340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src1_val1 = vld1_s16((int16_t *)pi2_src1); 6360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src1 += 4; 6370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst_tmp = pu1_dst + dst_strd; 6380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src2_val1 = vld1_s16((int16_t *)pi2_src2); 6400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src2 += 4; 6410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp1_t1 = vmull_s16(pi2_src1_val1, wgt0.val[0]); 6420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src1_val2 = vld1_s16((int16_t *)pi2_src_tmp1); 6440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp1_t2 = vmull_s16(pi2_src2_val1, wgt1.val[0]); 6450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src2_val2 = vld1_s16((int16_t *)pi2_src_tmp2); 6470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp1_t1 = vaddq_s32(i4_tmp1_t1, i4_tmp1_t2); 6480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp2_t1 = vmull_s16(pi2_src1_val2, wgt0.val[0]); 6500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp1_t1 = vaddq_s32(i4_tmp1_t1, tmp_lvl_shift_t.val[0]); 6510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp2_t2 = vmull_s16(pi2_src2_val2, wgt1.val[0]); 6530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp1 = vshlq_s32(i4_tmp1_t1, tmp_shift_t); 6540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp2_t1 = vaddq_s32(i4_tmp2_t1, i4_tmp2_t2); 6560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp2 = vqmovun_s32(sto_res_tmp1); 6570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp2_t1 = vaddq_s32(i4_tmp2_t1, tmp_lvl_shift_t.val[0]); 6590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp3 = vcombine_u16(sto_res_tmp2, sto_res_tmp2); 6600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp1 = vshlq_s32(i4_tmp2_t1, tmp_shift_t); 6620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res = vqmovn_u16(sto_res_tmp3); 6630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp2 = vqmovun_s32(sto_res_tmp1); 6650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp3 = vcombine_u16(sto_res_tmp2, sto_res_tmp2); 6660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1_lane_u32((uint32_t *)pu1_dst, vreinterpret_u32_u8(sto_res), 0); 6680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst += 4; 6690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res = vqmovn_u16(sto_res_tmp3); 6710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1_lane_u32((uint32_t *)pu1_dst_tmp, vreinterpret_u32_u8(sto_res), 0); 6720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 6730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src1 += 2 * src_strd1 - 2 * wd; 6740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src2 += 2 * src_strd2 - 2 * wd; 6750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst += 2 * dst_strd - 2 * wd; 6760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 6770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar} 6780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//WEIGHTED_PRED_CHROMA_BI 6790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 6800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/** 6810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 6820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 6830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @brief 6840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Does default bi-weighted prediction on the arrays pointed by pi2_src1 and 6850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* pi2_src2 and stores it at location pointed by pi2_dst Assumptions : The 6860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* function is optimized considering the fact Width and height are multiple 6870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* of 2. 6880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 6890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @par Description: 6900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* dst = ( (src1 + lvl_shift1) + (src2 + lvl_shift2) + 1 << (shift - 1) ) 6910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* >> shift where shift = 15 - BitDepth 6920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 6930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pi2_src1 6940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Pointer to source 1 6950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 6960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pi2_src2 6970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Pointer to source 2 6980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 6990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[out] pu1_dst 7000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Pointer to destination 7010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 7020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] src_strd1 7030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Source stride 1 7040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 7050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] src_strd2 7060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Source stride 2 7070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 7080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] dst_strd 7090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Destination stride 7100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 7110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] lvl_shift1 7120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* added before shift and offset 7130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 7140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] lvl_shift2 7150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* added before shift and offset 7160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 7170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] ht 7180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* height of the source 7190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 7200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] wd 7210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* width of the source 7220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 7230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @returns 7240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 7250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @remarks 7260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* None 7270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 7280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 7290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*/ 7300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarvoid ihevc_weighted_pred_bi_default_neonintr(WORD16 *pi2_src1, 7320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 *pi2_src2, 7330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst, 7340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 src_strd1, 7350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 src_strd2, 7360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 dst_strd, 7370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 lvl_shift1, 7380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 lvl_shift2, 7390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 ht, 7400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 wd) 7410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar{ 7420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 row, col; 7430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t pi2_src1_val1; 7440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t pi2_src1_val2; 7450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t pi2_src2_val1; 7460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t pi2_src2_val2; 7470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t i4_tmp1_t1; 7480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t i4_tmp1_t2; 7490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t i4_tmp2_t1; 7500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t i4_tmp2_t2; 7510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t sto_res_tmp1; 7520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar uint16x4_t sto_res_tmp2; 7530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar uint16x8_t sto_res_tmp3; 7540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar uint8x8_t sto_res; 7550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t tmp_lvl_shift_t; 7560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t tmp_shift_t; 7570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 *pi2_src_tmp1; 7580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 *pi2_src_tmp2; 7590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst_tmp; 7600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 shift; 7610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar shift = SHIFT_14_MINUS_BIT_DEPTH + 1; 7630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 tmp_shift = 0 - shift; 7640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 tmp_lvl_shift = 1 << (shift - 1); 7650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_lvl_shift_t = vmovq_n_s32(tmp_lvl_shift); 7660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_shift_t = vmovq_n_s32(tmp_shift); 7670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t lvl_shift1_t = vmov_n_s16((int16_t)lvl_shift1); 7690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t lvl_shift2_t = vmov_n_s16((int16_t)lvl_shift2); 7700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Used i4_tmp1_t & i4_tmp1_t to process 2 rows at a time. */ 7720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* height has also been unrolled, hence 2 rows will processed at a time */ 7730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* store also has been taken care for two row process */ 7740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* vcombine_u16 has been used since after narrowing we get 16x4 value which can't be */ 7750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* saturated and narrowed */ 7760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = ht; row > 0; row -= 2) 7780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 7790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(col = wd; col > 0; col -= 4) 7800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 7810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src_tmp1 = pi2_src1 + src_strd1; 7820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src_tmp2 = pi2_src2 + src_strd2; 7830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src1_val1 = vld1_s16((int16_t *)pi2_src1); 7850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src1 += 4; 7860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst_tmp = pu1_dst + dst_strd; 7870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src2_val1 = vld1_s16((int16_t *)pi2_src2); 7890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src2 += 4; 7900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp1_t1 = vaddl_s16(pi2_src1_val1, lvl_shift1_t); 7910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src1_val2 = vld1_s16((int16_t *)pi2_src_tmp1); 7930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp1_t2 = vaddl_s16(pi2_src2_val1, lvl_shift2_t); 7940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src2_val2 = vld1_s16((int16_t *)pi2_src_tmp2); 7960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp1_t1 = vaddq_s32(i4_tmp1_t1, i4_tmp1_t2); 7970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 7980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp2_t1 = vaddl_s16(pi2_src1_val2, lvl_shift1_t); 7990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp1_t1 = vaddq_s32(i4_tmp1_t1, tmp_lvl_shift_t); 8000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp2_t2 = vaddl_s16(pi2_src2_val2, lvl_shift2_t); 8020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp1 = vshlq_s32(i4_tmp1_t1, tmp_shift_t); 8030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp2_t1 = vaddq_s32(i4_tmp2_t1, i4_tmp2_t2); 8050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp2 = vqmovun_s32(sto_res_tmp1); 8060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp2_t1 = vaddq_s32(i4_tmp2_t1, tmp_lvl_shift_t); 8080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp3 = vcombine_u16(sto_res_tmp2, sto_res_tmp2); 8090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp1 = vshlq_s32(i4_tmp2_t1, tmp_shift_t); 8110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res = vqmovn_u16(sto_res_tmp3); 8120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp2 = vqmovun_s32(sto_res_tmp1); 8140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp3 = vcombine_u16(sto_res_tmp2, sto_res_tmp2); 8150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1_lane_u32((uint32_t *)pu1_dst, vreinterpret_u32_u8(sto_res), 0); 8170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst += 4; 8180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res = vqmovn_u16(sto_res_tmp3); 8200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1_lane_u32((uint32_t *)pu1_dst_tmp, vreinterpret_u32_u8(sto_res), 0); 8210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 8220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src1 += 2 * src_strd1 - wd; 8230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src2 += 2 * src_strd2 - wd; 8240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst += 2 * dst_strd - wd; 8250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 8260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar} 8270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//WEIGHTED_PRED_BI_DEFAULT 8280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/** 8300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 8310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 8320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @brief 8330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Does default bi-weighted prediction on the arrays pointed by pi2_src1 and 8340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* pi2_src2 and stores it at location pointed by pi2_dst Assumptions : The 8350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* function is optimized considering the fact Width and height are multiple 8360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* of 2. 8370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 8380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @par Description: 8390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* dst = ( (src1 + lvl_shift1) + (src2 + lvl_shift2) + 1 << (shift - 1) ) 8400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* >> shift where shift = 15 - BitDepth 8410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 8420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pi2_src1 8430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Pointer to source 1 8440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 8450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] pi2_src2 8460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Pointer to source 2 8470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 8480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[out] pu1_dst 8490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Pointer to destination 8500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 8510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] src_strd1 8520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Source stride 1 8530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 8540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] src_strd2 8550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Source stride 2 8560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 8570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] dst_strd 8580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Destination stride 8590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 8600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] lvl_shift1 8610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* added before shift and offset 8620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 8630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] lvl_shift2 8640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* added before shift and offset 8650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 8660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] ht 8670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* height of the source 8680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 8690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @param[in] wd 8700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* width of the source 8710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 8720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @returns 8730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 8740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @remarks 8750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* None 8760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* 8770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************* 8780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*/ 8790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 8800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarvoid ihevc_weighted_pred_chroma_bi_default_neonintr(WORD16 *pi2_src1, 8810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 *pi2_src2, 8820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst, 8830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 src_strd1, 8840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 src_strd2, 8850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 dst_strd, 8860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 lvl_shift1, 8870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 lvl_shift2, 8880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 ht, 8890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 wd) 8900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar{ 8910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 row, col; 8920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t pi2_src1_val1; 8930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t pi2_src1_val2; 8940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t pi2_src2_val1; 8950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t pi2_src2_val2; 8960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t i4_tmp1_t1; 8970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t i4_tmp1_t2; 8980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t i4_tmp2_t1; 8990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t i4_tmp2_t2; 9000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t sto_res_tmp1; 9010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar uint16x4_t sto_res_tmp2; 9020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar uint16x8_t sto_res_tmp3; 9030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar uint8x8_t sto_res; 9040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t tmp_lvl_shift_t; 9050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int32x4_t tmp_shift_t; 9060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 *pi2_src_tmp1; 9070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD16 *pi2_src_tmp2; 9080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar UWORD8 *pu1_dst_tmp; 9090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 shift; 9100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 tmp_shift; 9110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar WORD32 tmp_lvl_shift; 9120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t lvl_shift1_t; 9130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar int16x4_t lvl_shift2_t; 9140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar shift = SHIFT_14_MINUS_BIT_DEPTH + 1; 9150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_shift = 0 - shift; 9160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_lvl_shift = 1 << (shift - 1); 9170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_lvl_shift_t = vmovq_n_s32(tmp_lvl_shift); 9180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar tmp_shift_t = vmovq_n_s32(tmp_shift); 9190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar lvl_shift1_t = vmov_n_s16((int16_t)lvl_shift1); 9210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar lvl_shift2_t = vmov_n_s16((int16_t)lvl_shift2); 9220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* Used i4_tmp1_t & i4_tmp1_t to process 2 rows at a time. */ 9240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* height has also been unrolled, hence 2 rows will processed at a time */ 9250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* store also has been taken care for two row process */ 9260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* vcombine_u16 has been used since after narrowing we get 16x4 value which can't be */ 9270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar /* saturated and narrowed */ 9280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(row = ht; row > 0; row -= 2) 9300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 9310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar for(col = 2 * wd; col > 0; col -= 4) 9320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar { 9330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src_tmp1 = pi2_src1 + src_strd1; 9340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src_tmp2 = pi2_src2 + src_strd2; 9350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src1_val1 = vld1_s16((int16_t *)pi2_src1); 9370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src1 += 4; 9380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst_tmp = pu1_dst + dst_strd; 9390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src2_val1 = vld1_s16((int16_t *)pi2_src2); 9410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src2 += 4; 9420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp1_t1 = vaddl_s16(pi2_src1_val1, lvl_shift1_t); 9430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src1_val2 = vld1_s16((int16_t *)pi2_src_tmp1); 9450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp1_t2 = vaddl_s16(pi2_src2_val1, lvl_shift2_t); 9460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src2_val2 = vld1_s16((int16_t *)pi2_src_tmp2); 9480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp1_t1 = vaddq_s32(i4_tmp1_t1, i4_tmp1_t2); 9490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp2_t1 = vaddl_s16(pi2_src1_val2, lvl_shift1_t); 9510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp1_t1 = vaddq_s32(i4_tmp1_t1, tmp_lvl_shift_t); 9520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp2_t2 = vaddl_s16(pi2_src2_val2, lvl_shift2_t); 9540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp1 = vshlq_s32(i4_tmp1_t1, tmp_shift_t); 9550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp2_t1 = vaddq_s32(i4_tmp2_t1, i4_tmp2_t2); 9570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp2 = vqmovun_s32(sto_res_tmp1); 9580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar i4_tmp2_t1 = vaddq_s32(i4_tmp2_t1, tmp_lvl_shift_t); 9600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp3 = vcombine_u16(sto_res_tmp2, sto_res_tmp2); 9610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp1 = vshlq_s32(i4_tmp2_t1, tmp_shift_t); 9630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res = vqmovn_u16(sto_res_tmp3); 9640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp2 = vqmovun_s32(sto_res_tmp1); 9660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res_tmp3 = vcombine_u16(sto_res_tmp2, sto_res_tmp2); 9670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1_lane_u32((uint32_t *)pu1_dst, vreinterpret_u32_u8(sto_res), 0); 9690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst += 4; 9700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar 9710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar sto_res = vqmovn_u16(sto_res_tmp3); 9720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar vst1_lane_u32((uint32_t *)pu1_dst_tmp, vreinterpret_u32_u8(sto_res), 0); 9730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 9740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src1 += 2 * src_strd1 - 2 * wd; 9750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pi2_src2 += 2 * src_strd2 - 2 * wd; 9760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar pu1_dst += 2 * dst_strd - 2 * wd; 9770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar } 9780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar} 9790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar//WEIGHTED_PRED_CHROMA_BI_DEFAULT 980