18d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/****************************************************************************** 28d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 38d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Copyright (C) 2015 The Android Open Source Project 48d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 58d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Licensed under the Apache License, Version 2.0 (the "License"); 68d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * you may not use this file except in compliance with the License. 78d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * You may obtain a copy of the License at: 88d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 98d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * http://www.apache.org/licenses/LICENSE-2.0 108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Unless required by applicable law or agreed to in writing, software 128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * distributed under the License is distributed on an "AS IS" BASIS, 138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * See the License for the specific language governing permissions and 158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * limitations under the License. 168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ***************************************************************************** 188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S*/ 208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/** 218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @file 238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_luma_intra_pred_filters_ssse3.c 248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief 268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Contains function definitions for luma intra prediction filters in x86 278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * intrinsics 288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @author 308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Ittiam 318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par List of Functions: 338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * - ih264_intra_pred_luma_4x4_mode_vert_ssse3 348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * - ih264_intra_pred_luma_4x4_mode_horz_ssse3 358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * - ih264_intra_pred_luma_4x4_mode_dc_ssse3 368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * - ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3 378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * - ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3 388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * - ih264_intra_pred_luma_4x4_mode_vert_r_ssse3 398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * - ih264_intra_pred_luma_4x4_mode_horz_d_ssse3 408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * - ih264_intra_pred_luma_4x4_mode_vert_l_ssse3 418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * - ih264_intra_pred_luma_4x4_mode_horz_u_ssse3 428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * - ih264_intra_pred_luma_8x8_mode_vert_ssse3 438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * - ih264_intra_pred_luma_8x8_mode_horz_ssse3 448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * - ih264_intra_pred_luma_8x8_mode_dc_ssse3 458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * - ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3 468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * - ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3 478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * - ih264_intra_pred_luma_8x8_mode_vert_r_ssse3 488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * - ih264_intra_pred_luma_8x8_mode_horz_d_ssse3 498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * - ih264_intra_pred_luma_8x8_mode_vert_l_ssse3 508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * - ih264_intra_pred_luma_8x8_mode_horz_u_ssse3 518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * - ih264_intra_pred_luma_16x16_mode_vert_ssse3 528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * - ih264_intra_pred_luma_16x16_mode_horz_ssse3 538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * - ih264_intra_pred_luma_16x16_mode_dc_ssse3 548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * - ih264_intra_pred_luma_16x16_mode_plane_ssse3 558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks 578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * None 588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ****************************************************************************** 608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S */ 618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/*****************************************************************************/ 638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/* File Includes */ 648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/*****************************************************************************/ 658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/* System include files */ 668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S#include <stdio.h> 678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S#include <stddef.h> 688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S#include <string.h> 698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S#include <immintrin.h> 708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/* User include files */ 728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S#include "ih264_defs.h" 738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S#include "ih264_typedefs.h" 748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S#include "ih264_macros.h" 758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S#include "ih264_platform_macros.h" 768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S#include "ih264_intra_pred_filters.h" 778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/******************* LUMA INTRAPREDICTION *******************/ 818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/******************* 4x4 Modes *******************/ 838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/** 858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_4x4_mode_vert_ssse3 888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief 908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_4x4 mode:vertical 918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description: 938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_4x4 mode:vertical ,described in sec 8.3.1.2.1 948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src 968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the source 978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst 998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the destination 1008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 1018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd 1028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer source stride 1038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 1048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd 1058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer destination stride 1068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 1078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail 1088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function) 1098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 1108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns 1118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 1128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks 1138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * None 1148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 1158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 1168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S */ 1178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_4x4_mode_vert_ssse3(UWORD8 *pu1_src, 1188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_dst, 1198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 src_strd, 1208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd, 1218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 ngbr_avail) 1228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{ 1238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_top; 1248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd2, dst_strd3; 125796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy WORD32 i4_top; 1268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(src_strd); 1288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(ngbr_avail); 1298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_top = pu1_src + BLK_SIZE + 1; 1318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 132796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy i4_top = *((WORD32 *)pu1_top); 1338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd2 = dst_strd << 1; 1358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd3 = dst_strd + dst_strd2; 1368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 137796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst)) = i4_top; 138796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd)) = i4_top; 139796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd2)) = i4_top; 140796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd3)) = i4_top; 1418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S} 1428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/** 1448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 1458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 1468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *ih264_intra_pred_luma_4x4_mode_horz_ssse3 1478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 1488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief 1498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_4x4 mode:horizontal 1508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 1518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description: 1528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_4x4 mode:horizontal ,described in sec 8.3.1.2.2 1538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 1548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src 1558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the source 1568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 1578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst 1588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the destination 1598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 1608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd 1618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer source stride 1628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 1638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd 1648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer destination stride 1658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 1668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail 1678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function) 1688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 1698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns 1708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 1718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks 1728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * None 1738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 1748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 1758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S */ 1768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_4x4_mode_horz_ssse3(UWORD8 *pu1_src, 1778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_dst, 1788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 src_strd, 1798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd, 1808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 ngbr_avail) 1818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{ 182796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ 183796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy WORD32 row1,row2,row3,row4; 184796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy UWORD8 val; 1858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd2, dst_strd3; 1868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(src_strd); 1888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(ngbr_avail); 1898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_left = pu1_src + BLK_SIZE - 1; 1908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 191796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy val = *pu1_left; 192796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row1 = val + (val << 8) + (val << 16) + (val << 24); 193796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy val = *(pu1_left - 1); 194796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row2 = val + (val << 8) + (val << 16) + (val << 24); 195796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy val = *(pu1_left - 2); 196796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row3 = val + (val << 8) + (val << 16) + (val << 24); 197796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy val = *(pu1_left - 3); 198796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row4 = val + (val << 8) + (val << 16) + (val << 24); 1998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd2 = dst_strd << 1; 2018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd3 = dst_strd + dst_strd2; 2028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 203796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst)) = row1; 204796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd)) = row2; 205796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd2)) = row3; 206796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd3)) = row4; 2078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S} 2088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/** 2108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 2118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 2128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_4x4_mode_dc_ssse3 2138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 2148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief 2158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_4x4 mode:DC 2168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 2178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description: 2188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_4x4 mode:DC ,described in sec 8.3.1.2.3 2198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 2208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src 2218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the source 2228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 2238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst 2248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the destination 2258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 2268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd 2278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer source stride 2288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 2298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd 2308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer destination stride 2318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 2328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail 2338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels 2348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 2358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns 2368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 2378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks 2388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * None 2398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 2408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/ 2418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_4x4_mode_dc_ssse3(UWORD8 *pu1_src, 2428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_dst, 2438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 src_strd, 2448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd, 2458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 ngbr_avail) 2468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{ 2478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 u1_useleft; /* availability of left predictors (only for DC) */ 248796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy UWORD8 u1_usetop; /* availability of top predictors (only for DC) */ 249796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ 250796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ 2518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd2, dst_strd3; 252796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy WORD32 val = 0; 2538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(src_strd); 2548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(ngbr_avail); 2558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK); 2568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK); 2578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_top = pu1_src + BLK_SIZE + 1; 258796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy pu1_left = pu1_src + BLK_SIZE - 1; 2598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 260796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy if(u1_useleft) 2618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S { 262796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy val += *pu1_left--; 263796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy val += *pu1_left--; 264796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy val += *pu1_left--; 265796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy val += *pu1_left + 2; 2668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S } 267796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy if(u1_usetop) 268796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy { 269796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy val += *pu1_top + *(pu1_top + 1) + *(pu1_top + 2) + *(pu1_top + 3) 270796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy + 2; 271796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy } 272796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy /* Since 2 is added if either left/top pred is there, 273796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy val still being zero implies both preds are not there */ 274796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy val = (val) ? (val >> (1 + u1_useleft + u1_usetop)) : 128; 275796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy 276796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy val = val + (val << 8) + (val << 16) + (val << 24); 2778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd2 = dst_strd << 1; 2798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd3 = dst_strd + dst_strd2; 2808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 281796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst)) = val; 282796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd)) = val; 283796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd2)) = val; 284796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd3)) = val; 2858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S} 2868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 2878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/** 2888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 2898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 2908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3 2918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 2928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief 2938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left 2948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 2958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description: 2968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left ,described in sec 8.3.1.2.4 2978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 2988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src 2998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the source 3008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 3018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst 3028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the destination 3038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 3048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd 3058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer source stride 3068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 3078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd 3088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer destination stride 3098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 3108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail 3118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function) 3128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 3138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns 3148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 3158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks 3168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * None 3178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 3188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/ 3198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3(UWORD8 *pu1_src, 3208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_dst, 3218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 src_strd, 3228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd, 3238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 ngbr_avail) 3248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{ 3258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_top; 3268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd2, dst_strd3; 3278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 3288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i top_16x8b, top_8x16b, top_sh_8x16b; 3298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i res1_8x16b, res2_8x16b, res_16x8b; 3308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i zero_vector, const_2_8x16b; 331796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy WORD32 row1,row2,row3,row4; 3328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 3338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(src_strd); 3348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(ngbr_avail); 3358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 3368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_top = pu1_src + BLK_SIZE + 1; 3378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 3388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S top_16x8b = _mm_loadl_epi64((__m128i *)pu1_top); 3398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S zero_vector = _mm_setzero_si128(); 3408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S top_8x16b = _mm_unpacklo_epi8(top_16x8b, zero_vector); //t0 t1 t2 t3 t4 t5 t6 t7 3418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 3428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S top_sh_8x16b = _mm_srli_si128(top_8x16b, 2); //t1 t2 t3 t4 t5 t6 t7 0 3438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S const_2_8x16b = _mm_set1_epi16(2); 3448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 3458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S top_sh_8x16b = _mm_shufflehi_epi16(top_sh_8x16b, 0xa4); //t1 t2 t3 t4 t5 t6 t7 t7 3468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_8x16b = _mm_add_epi16(top_8x16b, top_sh_8x16b); 3478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_8x16b = _mm_srli_si128(res1_8x16b, 2); 3488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 3498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_8x16b = _mm_add_epi16(res1_8x16b, const_2_8x16b); 3508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_8x16b = _mm_add_epi16(res2_8x16b, res1_8x16b); 3518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_8x16b = _mm_srai_epi16(res1_8x16b, 2); 3528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 3538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd2 = dst_strd << 1; 3548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd3 = dst_strd + dst_strd2; 3558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 3568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res_16x8b = _mm_packus_epi16(res1_8x16b, res1_8x16b); 357796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row1 = _mm_cvtsi128_si32(res_16x8b); 3588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res_16x8b = _mm_srli_si128(res_16x8b, 1); 359796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row2 = _mm_cvtsi128_si32(res_16x8b); 3608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res_16x8b = _mm_srli_si128(res_16x8b, 1); 361796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row3 = _mm_cvtsi128_si32(res_16x8b); 3628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res_16x8b = _mm_srli_si128(res_16x8b, 1); 363796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row4 = _mm_cvtsi128_si32(res_16x8b); 364796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy 365796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst)) = row1; 366796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd)) = row2; 367796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd2)) = row3; 368796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd3)) = row4; 3698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S} 3708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 3718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/** 3728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 3738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 3748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3 3758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 3768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief 3778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right 3788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 3798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description: 3808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right ,described in sec 8.3.1.2.5 3818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 3828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src 3838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the source 3848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 3858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst 3868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the destination 3878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 3888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd 3898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer source stride 3908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 3918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd 3928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer destination stride 3938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 3948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail 3958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function) 3968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 3978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns 3988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 3998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks 4008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * None 4018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 4028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/ 4038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3(UWORD8 *pu1_src, 4048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_dst, 4058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 src_strd, 4068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd, 4078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 ngbr_avail) 4088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{ 4098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_left; 4108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd2, dst_strd3; 4118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i top_left_16x8b, top_left_8x16b; 4138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i top_left_sh_16x8b, top_left_sh_8x16b; 4148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i res1_8x16b, res2_8x16b; 4158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i res1_16x8b, res2_16x8b; 4168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i zero_vector, const_2_8x16b; 417796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy WORD32 row1,row2,row3,row4; 4188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(src_strd); 4208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(ngbr_avail); 4218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_left = pu1_src + BLK_SIZE - 1; 4238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S top_left_16x8b = _mm_loadu_si128((__m128i *)(pu1_left - 3)); //l3 l2 l1 l0 tl t0 t1 t2... 4258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S zero_vector = _mm_setzero_si128(); 4268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S top_left_sh_16x8b = _mm_srli_si128(top_left_16x8b, 1); //l2 l1 l0 tl t0 t1 t2 t3... 4278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S top_left_8x16b = _mm_unpacklo_epi8(top_left_16x8b, zero_vector); 4298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S top_left_sh_8x16b = _mm_unpacklo_epi8(top_left_sh_16x8b, zero_vector); 4308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_8x16b = _mm_add_epi16(top_left_8x16b, top_left_sh_8x16b); //l3+l2 l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2+t3... 4328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S const_2_8x16b = _mm_set1_epi16(2); 4338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_8x16b = _mm_srli_si128(res1_8x16b, 2); //l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2+t3... 4348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_8x16b = _mm_add_epi16(res1_8x16b, const_2_8x16b); 4368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_8x16b = _mm_add_epi16(res2_8x16b, res1_8x16b); //l3+2*l2+l1+2 l2+2*l1+l0+2... 4378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_8x16b = _mm_srai_epi16(res1_8x16b, 2); 4388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_16x8b = _mm_packus_epi16(res1_8x16b, res1_8x16b); 4398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd2 = dst_strd << 1; 4418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd3 = dst_strd + dst_strd2; 4428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_16x8b = _mm_srli_si128(res1_16x8b, 3); 444796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy 445796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row1 = _mm_cvtsi128_si32(res2_16x8b); 4468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_16x8b = _mm_srli_si128(res1_16x8b, 2); 447796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row2 = _mm_cvtsi128_si32(res2_16x8b); 4488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_16x8b = _mm_srli_si128(res1_16x8b, 1); 449796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row3 = _mm_cvtsi128_si32(res2_16x8b); 450796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row4 = _mm_cvtsi128_si32(res1_16x8b); 451796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy 452796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst)) = row1; 453796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd)) = row2; 454796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd2)) = row3; 455796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd3)) = row4; 4568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S} 4578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/** 4598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 4608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 4618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_4x4_mode_vert_r_ssse3 4628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 4638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief 4648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_4x4 mode:Vertical_Right 4658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 4668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description: 4678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_4x4 mode:Vertical_Right ,described in sec 8.3.1.2.6 4688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 4698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src 4708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the source 4718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 4728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst 4738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the destination 4748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 4758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd 4768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer source stride 4778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 4788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd 4798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer destination stride 4808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 4818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail 4828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function) 4838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 4848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns 4858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 4868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks 4878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * None 4888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 4898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/ 4908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_4x4_mode_vert_r_ssse3(UWORD8 *pu1_src, 4918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_dst, 4928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 src_strd, 4938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd, 4948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 ngbr_avail) 4958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{ 4968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_left; 4978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd2, dst_strd3; 4988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 4998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i val_16x8b, temp_16x8b; 5008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i w11_a1_16x8b, w11_a2_16x8b; 5018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i w121_a1_8x16b, w121_a2_8x16b, w121_sh_8x16b; 5028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b; 5038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i zero_vector, const_2_8x16b; 504796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy WORD32 row1,row2,row3,row4; 5058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 5068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(src_strd); 5078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(ngbr_avail); 5088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 5098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_left = pu1_src + BLK_SIZE - 1; 5108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 5118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S val_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 2)); 5128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S zero_vector = _mm_setzero_si128(); 5138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 5148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a1_8x16b = _mm_unpacklo_epi8(val_16x8b, zero_vector); //l2 l1 l0 tl t0 t1 t2 t3 5158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w11_a1_16x8b = _mm_srli_si128(val_16x8b, 3); 5168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //l1 l0 tl t0 t1 t2 t3 0 5178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w11_a2_16x8b = _mm_srli_si128(val_16x8b, 4); 5188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 5198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2+t3 t3 5208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row1_16x8b = _mm_avg_epu8(w11_a1_16x8b, w11_a2_16x8b); 5218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2+t3 t3 0 5228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 5238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S const_2_8x16b = _mm_set1_epi16(2); 5248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //l2+2*l1+l0 l1+2*l0+tl ... 5258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, const_2_8x16b); 5268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a1_8x16b = _mm_srai_epi16(w121_a1_8x16b, 2); 5278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 5288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_sh_8x16b = _mm_shufflelo_epi16(w121_a1_8x16b, 0xe1); 5298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_sh_8x16b = _mm_srli_si128(w121_sh_8x16b, 2); 5308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 5318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row4_16x8b = _mm_packus_epi16(w121_sh_8x16b, w121_sh_8x16b); 5328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp_16x8b = _mm_slli_si128(w121_a1_8x16b, 13); 5338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row2_16x8b = _mm_srli_si128(row4_16x8b, 1); 5348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row3_16x8b = _mm_alignr_epi8(row1_16x8b, temp_16x8b, 15); 5358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 5368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd2 = dst_strd << 1; 5378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd3 = dst_strd + dst_strd2; 5388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 539796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row1 = _mm_cvtsi128_si32(row1_16x8b); 540796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row2 = _mm_cvtsi128_si32(row2_16x8b); 541796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row3 = _mm_cvtsi128_si32(row3_16x8b); 542796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row4 = _mm_cvtsi128_si32(row4_16x8b); 543796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy 544796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst)) = row1; 545796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd)) = row2; 546796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd2)) = row3; 547796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd3)) = row4; 5488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S} 5498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 5508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/* 5518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 5528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 5538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_4x4_mode_horz_d_ssse3 5548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 5558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief 5568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_4x4 mode:Horizontal_Down 5578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 5588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description: 5598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_4x4 mode:Horizontal_Down ,described in sec 8.3.1.2.7 5608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 5618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src 5628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the source 5638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 5648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst 5658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the destination 5668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 5678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd 5688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer source stride 5698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 5708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd 5718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer destination stride 5728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 5738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail 5748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function) 5758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 5768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns 5778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 5788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks 5798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * None 5808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 5818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/ 5828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_4x4_mode_horz_d_ssse3(UWORD8 *pu1_src, 5838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_dst, 5848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 src_strd, 5858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd, 5868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 ngbr_avail) 5878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{ 5888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_left; 5898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd2, dst_strd3; 5908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 val_121_t0t1; 5918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 5928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i val_16x8b, val_sh_16x8b; 5938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i w11_16x8b; 5948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i w121_a1_8x16b, w121_a2_8x16b, w121_16x8b; 5958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b; 5968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 5978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i zero_vector, const_2_8x16b; 598796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy WORD32 row1,row2,row3,row4; 5998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 6008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(src_strd); 6018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(ngbr_avail); 6028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 6038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_left = pu1_src + BLK_SIZE - 1; 6048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 6058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S val_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 3)); 6068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S zero_vector = _mm_setzero_si128(); 6078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S val_sh_16x8b = _mm_srli_si128(val_16x8b, 1); 6088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w11_16x8b = _mm_avg_epu8(val_16x8b, val_sh_16x8b); 6098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 6108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a1_8x16b = _mm_unpacklo_epi8(val_16x8b, zero_vector); //l3 l2 l1 l0 tl t0 t1 t2 6118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //l2 l1 l0 tl t0 t1 t2 0 6128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //l3+l2 l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2 6138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2 0 6148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 6158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S zero_vector = _mm_setzero_si128(); 6168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S const_2_8x16b = _mm_set1_epi16(2); 6178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 6188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //l3+2*l2+l1 l2+2*l1+l0 l1+2*l0+tl ... 6198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, const_2_8x16b); 6208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a1_8x16b = _mm_srai_epi16(w121_a1_8x16b, 2); 6218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 6228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_16x8b = _mm_packus_epi16(w121_a1_8x16b, w121_a1_8x16b); 6238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 6248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row4_16x8b = _mm_unpacklo_epi8(w11_16x8b, w121_16x8b); 6258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S val_121_t0t1 = _mm_extract_epi16(w121_16x8b, 2); 6268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row4_16x8b = _mm_insert_epi16(row4_16x8b, val_121_t0t1, 4); 6278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 6288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd2 = dst_strd << 1; 6298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd3 = dst_strd + dst_strd2; 6308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 6318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row1_16x8b = _mm_srli_si128(row4_16x8b, 6); 6328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row2_16x8b = _mm_srli_si128(row4_16x8b, 4); 6338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row3_16x8b = _mm_srli_si128(row4_16x8b, 2); 6348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 635796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row1 = _mm_cvtsi128_si32(row1_16x8b); 636796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row2 = _mm_cvtsi128_si32(row2_16x8b); 637796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row3 = _mm_cvtsi128_si32(row3_16x8b); 638796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row4 = _mm_cvtsi128_si32(row4_16x8b); 639796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy 640796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst)) = row1; 641796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd)) = row2; 642796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd2)) = row3; 643796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd3)) = row4; 6448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S} 6458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 6468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/** 6478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 6488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 6498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_4x4_mode_vert_l_ssse3 6508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 6518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief 6528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_4x4 mode:Vertical_Left 6538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 6548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description: 6558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_4x4 mode:Vertical_Left ,described in sec 8.3.1.2.8 6568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 6578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src 6588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the source 6598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 6608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst 6618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the destination 6628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 6638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd 6648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer source stride 6658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 6668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd 6678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer destination stride 6688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 6698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail 6708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function) 6718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 6728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns 6738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 6748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks 6758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * None 6768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 6778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/ 6788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_4x4_mode_vert_l_ssse3(UWORD8 *pu1_src, 6798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_dst, 6808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 src_strd, 6818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd, 6828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 ngbr_avail) 6838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{ 6848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_top; 6858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd2, dst_strd3; 6868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 6878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i val_16x8b, val_sh_16x8b; 6888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i w121_a1_8x16b, w121_a2_8x16b; 6898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b; 6908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 6918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i zero_vector, const_2_8x16b; 692796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy WORD32 row1,row2,row3,row4; 6938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 6948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(src_strd); 6958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(ngbr_avail); 6968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 6978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_top = pu1_src +BLK_SIZE + 1; 6988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 6998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S val_16x8b = _mm_loadl_epi64((__m128i *)pu1_top); 7008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S zero_vector = _mm_setzero_si128(); 7018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S val_sh_16x8b = _mm_srli_si128(val_16x8b, 1); 7028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row1_16x8b = _mm_avg_epu8(val_16x8b, val_sh_16x8b); 7038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 7048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a1_8x16b = _mm_unpacklo_epi8(val_16x8b, zero_vector); //t0 t1 t2 t3 t4 t5... 7058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //t1 t2 t3 t4 t5 t6... 7068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //t0+t1 t1+t2 t2+t3 t3+t4 t4+t5... 7078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //t1+t2 t2+t3 t3+t4 t4+t5 t5+t6... 7088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 7098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S zero_vector = _mm_setzero_si128(); 7108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S const_2_8x16b = _mm_set1_epi16(2); 7118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 7128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //t0+2*t1+t2 t1+2*t2+t3 t2+2*t3+t4... 7138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, const_2_8x16b); 7148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a1_8x16b = _mm_srai_epi16(w121_a1_8x16b, 2); 7158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 7168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row2_16x8b = _mm_packus_epi16(w121_a1_8x16b, w121_a1_8x16b); 7178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 7188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd2 = dst_strd << 1; 7198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd3 = dst_strd + dst_strd2; 7208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 7218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row3_16x8b = _mm_srli_si128(row1_16x8b, 1); 7228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row4_16x8b = _mm_srli_si128(row2_16x8b, 1); 7238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 724796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row1 = _mm_cvtsi128_si32(row1_16x8b); 725796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row2 = _mm_cvtsi128_si32(row2_16x8b); 726796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row3 = _mm_cvtsi128_si32(row3_16x8b); 727796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row4 = _mm_cvtsi128_si32(row4_16x8b); 728796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy 729796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst)) = row1; 730796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd)) = row2; 731796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd2)) = row3; 732796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd3)) = row4; 7338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S} 7348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 7358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/** 7368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 7378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 7388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_4x4_mode_horz_u_ssse3 7398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 7408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief 7418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_4x4 mode:Horizontal_Up 7428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 7438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description: 7448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_4x4 mode:Horizontal_Up ,described in sec 8.3.1.2.9 7458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 7468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src 7478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the source 7488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 7498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst 7508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the destination 7518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 7528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd 7538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer source stride 7548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 7558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd 7568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer destination stride 7578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 7588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail 7598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function) 7608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 7618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns 7628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 7638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks 7648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * None 7658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 7668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/ 7678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_4x4_mode_horz_u_ssse3(UWORD8 *pu1_src, 7688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_dst, 7698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 src_strd, 7708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd, 7718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 ngbr_avail) 7728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{ 7738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_left; 7748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd2, dst_strd3; 7758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 7768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i val_16x8b, val_sh_16x8b; 7778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i w11_16x8b; 7788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i w121_a1_8x16b, w121_a2_8x16b, w121_16x8b; 7798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b; 7808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 7818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i zero_vector, const_2_8x16b, rev_16x8b; 782796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy WORD32 row1,row2,row3,row4; 7838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 7848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(src_strd); 7858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(ngbr_avail); 7868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 7878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_left = pu1_src + BLK_SIZE - 1; 7888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 7898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S zero_vector = _mm_setzero_si128(); 7908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S rev_16x8b = _mm_setr_epi8(3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 7918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 7928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S val_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 3)); //l3 l2 l1 l0 0 0 0... 7938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S val_16x8b = _mm_shuffle_epi8(val_16x8b, rev_16x8b); //l0 l1 l2 l3 l3 l3 l3... 7948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 7958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S val_sh_16x8b = _mm_srli_si128(val_16x8b, 1); 7968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w11_16x8b = _mm_avg_epu8(val_16x8b, val_sh_16x8b); 7978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 7988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a1_8x16b = _mm_unpacklo_epi8(val_16x8b, zero_vector); //l0 l1 l2 l3 l3 l3... 7998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //l1 l2 l3 l3 l3 l3... 8008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 8018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //l0+t1 l1+l2 l2+l3 2*l3 2*l3... 8028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2); //l1+t2 l2+l3 2*l3 2*l3 2*l3... 8038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 8048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S zero_vector = _mm_setzero_si128(); 8058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S const_2_8x16b = _mm_set1_epi16(2); 8068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 8078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b); //l0+2*l1+l2 l1+2*l2+l3 l2+3*l3 4*l3 4*l3... 8088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, const_2_8x16b); 8098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_a1_8x16b = _mm_srai_epi16(w121_a1_8x16b, 2); 8108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 8118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S w121_16x8b = _mm_packus_epi16(w121_a1_8x16b, w121_a1_8x16b); 8128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 8138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd2 = dst_strd << 1; 8148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd3 = dst_strd + dst_strd2; 8158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 8168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row1_16x8b = _mm_unpacklo_epi8(w11_16x8b, w121_16x8b); 8178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row2_16x8b = _mm_srli_si128(row1_16x8b, 2); 8188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row3_16x8b = _mm_srli_si128(row1_16x8b, 4); 8198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row4_16x8b = _mm_srli_si128(row1_16x8b, 6); 8208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 821796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row1 = _mm_cvtsi128_si32(row1_16x8b); 822796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row2 = _mm_cvtsi128_si32(row2_16x8b); 823796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row3 = _mm_cvtsi128_si32(row3_16x8b); 824796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row4 = _mm_cvtsi128_si32(row4_16x8b); 825796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy 826796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst)) = row1; 827796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd)) = row2; 828796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd2)) = row3; 829796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy *((WORD32 *)(pu1_dst + dst_strd3)) = row4; 8308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S} 8318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 8328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/******************* 8x8 Modes *******************/ 8338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 8348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/** 8358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 8368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 8378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_8x8_mode_vert_ssse3 8388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 8398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief 8408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_8x8 mode:vertical 8418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 8428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description: 8438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_8x8 mode:vertical ,described in sec 8.3.2.2.2 8448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 8458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src 8468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the source 8478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 8488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst 8498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the destination 8508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 8518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd 8528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer source stride 8538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 8548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd 8558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer destination stride 8568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 8578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail 8588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function) 8598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 8608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns 8618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 8628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks 8638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * None 8648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 8658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 8668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S */ 8678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_8x8_mode_vert_ssse3(UWORD8 *pu1_src, 8688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_dst, 8698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 src_strd, 8708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd, 8718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 ngbr_avail) 8728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{ 8738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_top = NULL; 8748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i top_8x8b; 8758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(src_strd); 8768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(ngbr_avail); 8778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_top = pu1_src + BLK8x8SIZE + 1; 8788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 8798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S top_8x8b = _mm_loadl_epi64((__m128i *)pu1_top); 8808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 8818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), top_8x8b); 8828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), top_8x8b); 8838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), top_8x8b); 8848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), top_8x8b); 8858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), top_8x8b); 8868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), top_8x8b); 8878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), top_8x8b); 8888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), top_8x8b); 8898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S} 8908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 8918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/** 8928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 8938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 8948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_8x8_mode_horz_ssse3 8958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 8968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief 8978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_8x8 mode:horizontal 8988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 8998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description: 9008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for uma_8x8 mode:horizontal ,described in sec 8.3.2.2.2 9018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 9028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src 9038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the source 9048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 9058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst 9068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the destination 9078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 9088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd 9098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer source stride 9108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 9118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd 9128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer destination stride 9138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 9148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail 9158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function) 9168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 9178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns 9188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 9198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks 9208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * None 9218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 9228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 9238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S */ 9248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_8x8_mode_horz_ssse3(UWORD8 *pu1_src, 9258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_dst, 9268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 src_strd, 9278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd, 9288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 ngbr_avail) 9298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{ 9308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_left = pu1_src + BLK8x8SIZE - 1; 9318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i row1_8x8b, row2_8x8b, row3_8x8b, row4_8x8b; 9328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i row5_8x8b, row6_8x8b, row7_8x8b, row8_8x8b; 9338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 9348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(src_strd); 9358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(ngbr_avail); 9368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 9378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row1_8x8b = _mm_set1_epi8(pu1_left[0]); 9388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row2_8x8b = _mm_set1_epi8(pu1_left[-1]); 9398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row3_8x8b = _mm_set1_epi8(pu1_left[-2]); 9408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row4_8x8b = _mm_set1_epi8(pu1_left[-3]); 9418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row5_8x8b = _mm_set1_epi8(pu1_left[-4]); 9428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row6_8x8b = _mm_set1_epi8(pu1_left[-5]); 9438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row7_8x8b = _mm_set1_epi8(pu1_left[-6]); 9448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S row8_8x8b = _mm_set1_epi8(pu1_left[-7]); 9458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 9468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), row1_8x8b); 9478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), row2_8x8b); 9488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), row3_8x8b); 9498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), row4_8x8b); 9508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), row5_8x8b); 9518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), row6_8x8b); 9528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), row7_8x8b); 9538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), row8_8x8b); 9548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S} 9558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 9568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/** 9578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 9588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 9598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_8x8_mode_dc_ssse3 9608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 9618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief 9628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_8x8 mode:DC 9638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 9648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description: 9658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_8x8 mode:DC ,described in sec 8.3.2.2.4 9668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 9678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src 9688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the source 9698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 9708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst 9718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the destination 9728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 9738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd 9748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer source stride 9758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 9768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd 9778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer destination stride 9788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 9798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail 9808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels 9818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 9828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns 9838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 9848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks 9858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * None 9868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 9878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/ 9888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_8x8_mode_dc_ssse3(UWORD8 *pu1_src, 9898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_dst, 9908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 src_strd, 9918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd, 9928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 ngbr_avail) 9938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{ 9948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 u1_useleft; /* availability of left predictors (only for DC) */ 9958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 u1_usetop; /* availability of top predictors (only for DC) */ 9968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ 9978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ 9988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i dc_val_8x8b; 9998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dc_val = 0; 10008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(src_strd); 10018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 10028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK); 10038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK); 10048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_top = pu1_src + BLK8x8SIZE + 1; 10058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_left = pu1_src + BLK8x8SIZE - 1; 10068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 10078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S if(u1_useleft || u1_usetop) 10088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S { 10098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 shft = 2; 10108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i val_8x8b, zero_8x8b, sum_8x16b; 10118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 10128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S zero_8x8b = _mm_setzero_si128(); 10138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 10148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S if(u1_useleft) 10158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S { 10168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S val_8x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 7)); 10178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sum_8x16b = _mm_sad_epu8(zero_8x8b, val_8x8b); 10188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 10198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S shft++; 10208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dc_val += 4; 10218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dc_val += _mm_extract_epi16(sum_8x16b, 0); 10228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S } 10238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S if(u1_usetop) 10248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S { 10258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S val_8x8b = _mm_loadl_epi64((__m128i *)pu1_top); 10268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sum_8x16b = _mm_sad_epu8(zero_8x8b, val_8x8b); 10278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 10288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S shft++; 10298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dc_val += 4; 10308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dc_val += _mm_extract_epi16(sum_8x16b, 0); 10318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S } 10328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dc_val = dc_val >> shft; 10338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S } 10348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S else 10358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dc_val = 128; 10368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 10378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dc_val_8x8b = _mm_set1_epi8(dc_val); 10388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 10398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), dc_val_8x8b); 10408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), dc_val_8x8b); 10418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), dc_val_8x8b); 10428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), dc_val_8x8b); 10438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), dc_val_8x8b); 10448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), dc_val_8x8b); 10458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), dc_val_8x8b); 10468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), dc_val_8x8b); 10478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S} 10488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 10498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/** 10508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 10518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 10528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3 10538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 10548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief 10558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left 10568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 10578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description: 10588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left ,described in sec 8.3.2.2.5 10598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 10608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src 10618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the source 10628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 10638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst 10648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the destination 10658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 10668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd 10678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer source stride 10688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 10698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd 10708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer destination stride 10718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 10728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail 10738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function) 10748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 10758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns 10768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 10778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks 10788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * None 10798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 10808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/ 10818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3(UWORD8 *pu1_src, 10828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_dst, 10838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 src_strd, 10848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd, 10858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 ngbr_avail) 10868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{ 10878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ 10888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i top_16x8; 10898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i out_15x16; 10908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i a0_8x16, a1_8x16, a2_8x16; 10918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i temp1, temp2; 10928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i res1_8x16, res2_8x16; 10938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i zero = _mm_setzero_si128(); 10948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i const_val2_8x16 = _mm_set1_epi16(2); 10958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 10968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(src_strd); 10978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(ngbr_avail); 10988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 10998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_top = pu1_src + BLK8x8SIZE + 1; 11008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 11018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S top_16x8 = _mm_loadu_si128((__m128i *)(pu1_top)); 11028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 11038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp1 = _mm_srli_si128(top_16x8, 1); 11048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp2 = _mm_srli_si128(top_16x8, 2); 11058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_unpacklo_epi8(top_16x8, zero); 11068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a1_8x16 = _mm_unpacklo_epi8(temp1, zero); 11078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a2_8x16 = _mm_unpacklo_epi8(temp2, zero); 11088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 11098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16); 11108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16); 11118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16); 11128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16); 11138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_8x16 = _mm_srai_epi16(a0_8x16, 2); 11148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 11158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp2 = _mm_srli_si128(top_16x8, 2); 11168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp1 = _mm_srli_si128(top_16x8, 1); 11178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a2_8x16 = _mm_unpackhi_epi8(temp2, zero); 11188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_unpackhi_epi8(top_16x8, zero); 11198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a2_8x16 = _mm_shufflehi_epi16(a2_8x16, 0x14); 11208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a1_8x16 = _mm_unpackhi_epi8(temp1, zero); 11218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 11228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16); 11238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16); 11248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16); 11258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16); 11268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_8x16 = _mm_srai_epi16(a0_8x16, 2); 11278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 11288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out_15x16 = _mm_packus_epi16(res1_8x16, res2_8x16); 11298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 11308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), out_15x16); 11318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out_15x16 = _mm_srli_si128(out_15x16, 1); 11328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), out_15x16); 11338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out_15x16 = _mm_srli_si128(out_15x16, 1); 11348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), out_15x16); 11358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out_15x16 = _mm_srli_si128(out_15x16, 1); 11368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), out_15x16); 11378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out_15x16 = _mm_srli_si128(out_15x16, 1); 11388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), out_15x16); 11398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out_15x16 = _mm_srli_si128(out_15x16, 1); 11408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), out_15x16); 11418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out_15x16 = _mm_srli_si128(out_15x16, 1); 11428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), out_15x16); 11438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out_15x16 = _mm_srli_si128(out_15x16, 1); 11448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), out_15x16); 11458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S} 11468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 11478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/** 11488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 11498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 11508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3 11518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 11528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief 11538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right 11548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 11558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description: 11568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right ,described in sec 8.3.2.2.6 11578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 11588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src 11598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the source 11608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 11618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst 11628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the destination 11638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 11648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd 11658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer source stride 11668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 11678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd 11688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer destination stride 11698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 11708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail 11718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function) 11728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 11738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns 11748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 11758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks 11768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * None 11778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 11788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/ 11798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3(UWORD8 *pu1_src, 11808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_dst, 11818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 src_strd, 11828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd, 11838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 ngbr_avail) 11848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{ 11858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ 11868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ 11878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i top_8x8, left_16x8; 11888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i out_15x16; 11898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i a0_8x16, a1_8x16, a2_8x16; 11908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i temp1, temp2; 11918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i res1_8x16, res2_8x16; 11928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i zero = _mm_setzero_si128(); 11938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i const_val2_8x16 = _mm_set1_epi16(2); 11948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i str_8x8; 11958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 11968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(src_strd); 11978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(ngbr_avail); 11988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 11998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_left = pu1_src + BLK8x8SIZE - 1; 12008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_top = pu1_src + BLK8x8SIZE + 1; 12018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 12028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S left_16x8 = _mm_loadu_si128((__m128i *)(pu1_left - 7)); 12038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 12048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp1 = _mm_srli_si128(left_16x8, 1); 12058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp2 = _mm_srli_si128(left_16x8, 2); 12068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_unpacklo_epi8(left_16x8, zero); 12078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a1_8x16 = _mm_unpacklo_epi8(temp1, zero); 12088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a2_8x16 = _mm_unpacklo_epi8(temp2, zero); 12098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 12108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16); 12118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16); 12128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16); 12138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16); 12148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_8x16 = _mm_srai_epi16(a0_8x16, 2); 12158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 12168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S top_8x8 = _mm_loadu_si128((__m128i *)(pu1_top - 1)); 12178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 12188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp1 = _mm_srli_si128(top_8x8, 1); 12198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp2 = _mm_srli_si128(top_8x8, 2); 12208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_unpacklo_epi8(top_8x8, zero); 12218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a1_8x16 = _mm_unpacklo_epi8(temp1, zero); 12228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a2_8x16 = _mm_unpacklo_epi8(temp2, zero); 12238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 12248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16); 12258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16); 12268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16); 12278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16); 12288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_8x16 = _mm_srai_epi16(a0_8x16, 2); 12298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 12308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out_15x16 = _mm_packus_epi16(res1_8x16, res2_8x16); 12318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 12328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out_15x16, 7); 12338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), str_8x8); 12348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out_15x16, 6); 12358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), str_8x8); 12368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out_15x16, 5); 12378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), str_8x8); 12388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out_15x16, 4); 12398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), str_8x8); 12408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out_15x16, 3); 12418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), str_8x8); 12428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out_15x16, 2); 12438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), str_8x8); 12448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out_15x16, 1); 12458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), str_8x8); 12468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), out_15x16); 12478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S} 12488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 12498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/** 12508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 12518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 12528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_8x8_mode_vert_r_ssse3 12538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 12548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief 12558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_8x8 mode:Vertical_Right 12568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 12578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description: 12588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_8x8 mode:Vertical_Right ,described in sec 8.3.2.2.7 12598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 12608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src 12618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the source 12628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 12638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst 12648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the destination 12658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 12668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd 12678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer source stride 12688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 12698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd 12708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer destination stride 12718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 12728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail 12738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function) 12748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 12758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns 12768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 12778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks 12788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * None 12798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 12808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/ 12818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_8x8_mode_vert_r_ssse3(UWORD8 *pu1_src, 12828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_dst, 12838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 src_strd, 12848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd, 12858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 ngbr_avail) 12868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{ 12878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ 12888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ 12898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i top_8x8, left_16x8; 12908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i out1_16x16, out2_16x16; 12918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i a0_8x16, a1_8x16, a2_8x16; 12928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i temp1, temp2; 12938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i res1_8x16, res2_8x16, res3_8x16; 12948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i zero = _mm_setzero_si128(); 12958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i const_val2_8x16 = _mm_set1_epi16(2); 12968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i str_8x8; 12978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i mask = _mm_set1_epi32(0xFFFF); 12988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 12998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(src_strd); 13008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(ngbr_avail); 13018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 13028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_left = pu1_src + BLK8x8SIZE - 1; 13038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_top = pu1_src + BLK8x8SIZE + 1; 13048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 13058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S left_16x8 = _mm_loadu_si128((__m128i *)(pu1_left - 6)); 13068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 13078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp1 = _mm_srli_si128(left_16x8, 1); 13088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp2 = _mm_srli_si128(left_16x8, 2); 13098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_unpacklo_epi8(left_16x8, zero); 13108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a1_8x16 = _mm_unpacklo_epi8(temp1, zero); 13118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a2_8x16 = _mm_unpacklo_epi8(temp2, zero); 13128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 13138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16); 13148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16); 13158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16); 13168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16); 13178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_8x16 = _mm_srai_epi16(a0_8x16, 2); 13188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 13198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S top_8x8 = _mm_loadu_si128((__m128i *)(pu1_top - 1)); 13208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 13218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp1 = _mm_srli_si128(top_8x8, 1); 13228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp2 = _mm_srli_si128(top_8x8, 2); 13238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_unpacklo_epi8(top_8x8, zero); 13248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a1_8x16 = _mm_unpacklo_epi8(temp1, zero); 13258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a2_8x16 = _mm_unpacklo_epi8(temp2, zero); 13268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 13278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res3_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16); 13288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 13298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16); 13308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16); 13318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16); 13328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16); 13338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_8x16 = _mm_srai_epi16(a0_8x16, 2); 13348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 13358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_packus_epi16(res3_8x16, zero); 13368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), str_8x8); 13378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 13388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp1 = _mm_and_si128(res1_8x16, mask); 13398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp1 = _mm_packs_epi32(temp1, temp1); 13408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out1_16x16 = _mm_packus_epi16(temp1, res2_8x16); 13418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 13428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_8x16 = _mm_slli_si128(res1_8x16, 2); 13438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp1 = _mm_and_si128(res1_8x16, mask); 13448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp1 = _mm_packs_epi32(temp1, temp1); 13458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out2_16x16 = _mm_packus_epi16(temp1, res3_8x16); 13468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 13478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out1_16x16, 7); 13488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), str_8x8); 13498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 13508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out2_16x16, 7); 13518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), str_8x8); 13528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 13538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out1_16x16, 6); 13548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), str_8x8); 13558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 13568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out2_16x16, 6); 13578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), str_8x8); 13588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 13598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out1_16x16, 5); 13608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), str_8x8); 13618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 13628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out2_16x16, 5); 13638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), str_8x8); 13648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 13658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out1_16x16, 4); 13668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), str_8x8); 13678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S} 13688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 13698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/* 13708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 13718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 13728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_8x8_mode_horz_d_ssse3 13738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 13748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief 13758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_8x8 mode:Horizontal_Down 13768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 13778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description: 13788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_8x8 mode:Horizontal_Down ,described in sec 8.3.2.2.8 13798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 13808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src 13818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the source 13828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 13838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst 13848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the destination 13858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 13868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd 13878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer source stride 13888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 13898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd 13908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer destination stride 13918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 13928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail 13938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function) 13948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 13958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns 13968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 13978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks 13988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * None 13998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 14008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/ 14018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_8x8_mode_horz_d_ssse3(UWORD8 *pu1_src, 14028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_dst, 14038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 src_strd, 14048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd, 14058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 ngbr_avail) 14068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{ 14078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ 14088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i pels_16x16; 14098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i temp1, temp2, temp3, temp4; 14108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i a0_8x16, a1_8x16, a2_8x16; 14118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i zero = _mm_setzero_si128(); 14128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i const_val2_8x16 = _mm_set1_epi16(2); 14138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i res1_8x16, res2_8x16; 14148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i out1_16x16, out2_16x16; 14158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i str_8x8; 14168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(src_strd); 14178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(ngbr_avail); 14188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 14198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_left = pu1_src + BLK8x8SIZE - 1; 14208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 14218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pels_16x16 = _mm_loadu_si128((__m128i *)(pu1_left - 7)); 14228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 14238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp1 = _mm_srli_si128(pels_16x16, 1); 14248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp2 = _mm_srli_si128(pels_16x16, 2); 14258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_unpacklo_epi8(pels_16x16, zero); 14268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a1_8x16 = _mm_unpacklo_epi8(temp1, zero); 14278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a2_8x16 = _mm_unpacklo_epi8(temp2, zero); 14288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 14298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16); 14308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 14318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16); 14328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16); 14338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16); 14348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16); 14358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_8x16 = _mm_srai_epi16(a0_8x16, 2); 14368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 14378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp3 = _mm_unpacklo_epi16(res1_8x16, res2_8x16); 14388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp4 = _mm_unpackhi_epi16(res1_8x16, res2_8x16); 14398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out2_16x16 = _mm_packus_epi16(temp3, temp4); 14408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 14418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_unpackhi_epi8(pels_16x16, zero); 14428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a1_8x16 = _mm_unpackhi_epi8(temp1, zero); 14438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a2_8x16 = _mm_unpackhi_epi8(temp2, zero); 14448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 14458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16); 14468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16); 14478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16); 14488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16); 14498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_8x16 = _mm_srai_epi16(a0_8x16, 2); 14508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 14518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out1_16x16 = _mm_packus_epi16(res2_8x16, zero); 14528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp1 = _mm_srli_si128(out2_16x16, 8); 14538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out1_16x16 = _mm_unpacklo_epi64(temp1, out1_16x16); 14548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 14558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out1_16x16, 6); 14568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), str_8x8); 14578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out1_16x16, 4); 14588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), str_8x8); 14598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out1_16x16, 2); 14608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), str_8x8); 14618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), out1_16x16); 14628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 14638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out2_16x16, 6); 14648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), str_8x8); 14658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out2_16x16, 4); 14668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), str_8x8); 14678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out2_16x16, 2); 14688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), str_8x8); 14698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), out2_16x16); 14708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S} 14718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 14728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/** 14738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 14748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 14758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_8x8_mode_vert_l_ssse3 14768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 14778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief 14788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_8x8 mode:Vertical_Left 14798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 14808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description: 14818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_8x8 mode:Vertical_Left ,described in sec 8.3.2.2.9 14828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 14838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src 14848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the source 14858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 14868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst 14878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the destination 14888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 14898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd 14908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer source stride 14918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 14928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd 14938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer destination stride 14948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 14958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail 14968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function) 14978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 14988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns 14998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 15008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks 15018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * None 15028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 15038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/ 15048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 15058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_8x8_mode_vert_l_ssse3(UWORD8 *pu1_src, 15068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_dst, 15078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 src_strd, 15088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd, 15098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 ngbr_avail) 15108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{ 15118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */ 15128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i top_16x16; 15138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i temp1, temp2; 15148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i a0_8x16, a1_8x16, a2_8x16; 15158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i zero = _mm_setzero_si128(); 15168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i const_val2_8x16 = _mm_set1_epi16(2); 15178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i res1_8x16, res2_8x16, res3_8x16, res4_8x16; 15188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i out1_16x16, out2_16x16; 15198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(src_strd); 15208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(ngbr_avail); 15218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_top = pu1_src + BLK8x8SIZE + 1; 15228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 15238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S top_16x16 = _mm_loadu_si128((__m128i *)(pu1_top)); 15248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp1 = _mm_srli_si128(top_16x16, 1); 15258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp2 = _mm_srli_si128(top_16x16, 2); 15268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_unpacklo_epi8(top_16x16, zero); 15278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a1_8x16 = _mm_unpacklo_epi8(temp1, zero); 15288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a2_8x16 = _mm_unpacklo_epi8(temp2, zero); 15298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 15308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16); 15318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 15328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16); 15338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16); 15348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16); 15358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16); 15368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_8x16 = _mm_srai_epi16(a0_8x16, 2); 15378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 15388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_unpackhi_epi8(top_16x16, zero); 15398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a1_8x16 = _mm_unpackhi_epi8(temp1, zero); 15408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a2_8x16 = _mm_unpackhi_epi8(temp2, zero); 15418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 15428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res3_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16); 15438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 15448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16); 15458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16); 15468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16); 15478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16); 15488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res4_8x16 = _mm_srai_epi16(a0_8x16, 2); 15498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 15508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out1_16x16 = _mm_packus_epi16(res1_8x16, res3_8x16); 15518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out2_16x16 = _mm_packus_epi16(res2_8x16, res4_8x16); 15528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 15538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), out1_16x16); 15548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), out2_16x16); 15558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out1_16x16 = _mm_srli_si128(out1_16x16, 1); 15568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out2_16x16 = _mm_srli_si128(out2_16x16, 1); 15578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), out1_16x16); 15588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), out2_16x16); 15598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out1_16x16 = _mm_srli_si128(out1_16x16, 1); 15608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out2_16x16 = _mm_srli_si128(out2_16x16, 1); 15618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), out1_16x16); 15628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), out2_16x16); 15638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out1_16x16 = _mm_srli_si128(out1_16x16, 1); 15648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out2_16x16 = _mm_srli_si128(out2_16x16, 1); 15658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), out1_16x16); 15668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), out2_16x16); 15678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S} 15688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 15698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/** 15708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 15718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 15728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_8x8_mode_horz_u_ssse3 15738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 15748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief 15758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_8x8 mode:Horizontal_Up 15768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 15778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description: 15788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_8x8 mode:Horizontal_Up ,described in sec 8.3.2.2.10 15798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 15808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src 15818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the source 15828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 15838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst 15848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the destination 15858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 15868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd 15878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer source stride 15888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 15898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd 15908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer destination stride 15918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 15928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail 15938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function) 15948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 15958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns 15968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 15978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks 15988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * None 15998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 16008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/ 16018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_8x8_mode_horz_u_ssse3(UWORD8 *pu1_src, 16028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_dst, 16038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 src_strd, 16048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd, 16058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 ngbr_avail) 16068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{ 16078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */ 16088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i left_16x16; 16098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i temp1, temp2; 16108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i a0_8x16, a1_8x16, a2_8x16; 16118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i zero = _mm_setzero_si128(); 16128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i const_val2_8x16 = _mm_set1_epi16(2); 16138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i res1_8x16, res2_8x16; 16148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i out1_16x16; 16158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i str_8x8; 16168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i shuffle_16x16; 16178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(src_strd); 16188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(ngbr_avail); 16198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 16208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_left = pu1_src + BLK8x8SIZE - 1; 16218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S shuffle_16x16 = _mm_set_epi8(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 16228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 16238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 0x0F); 16248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 16258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S left_16x16 = _mm_loadu_si128((__m128i *)(pu1_left - 7)); 16268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp1 = _mm_srli_si128(left_16x16, 1); 16278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_unpacklo_epi8(left_16x16, zero); 16288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_slli_si128(a0_8x16, 2); 16298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a1_8x16 = _mm_unpacklo_epi8(left_16x16, zero); 16308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_shufflelo_epi16(a0_8x16, 0xE5); 16318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a2_8x16 = _mm_unpacklo_epi8(temp1, zero); 16328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 16338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16); 16348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 16358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16); 16368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16); 16378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16); 16388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16); 16398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_8x16 = _mm_srai_epi16(a0_8x16, 2); 16408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 16418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp1 = _mm_unpacklo_epi16(res1_8x16, res2_8x16); 16428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp2 = _mm_unpackhi_epi16(res1_8x16, res2_8x16); 16438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out1_16x16 = _mm_packus_epi16(temp1, temp2); 16448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S out1_16x16 = _mm_shuffle_epi8(out1_16x16, shuffle_16x16); 16458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 16468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out1_16x16, 1); 16478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), str_8x8); 16488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out1_16x16, 3); 16498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), str_8x8); 16508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out1_16x16, 5); 16518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), str_8x8); 16528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(out1_16x16, 7); 16538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), str_8x8); 16548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S temp1 = _mm_set1_epi8(pu1_left[-7]); 16558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_unpacklo_epi64(str_8x8, temp1); 16568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(str_8x8, 2); 16578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), str_8x8); 16588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(str_8x8, 2); 16598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), str_8x8); 16608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(str_8x8, 2); 16618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), str_8x8); 16628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S str_8x8 = _mm_srli_si128(str_8x8, 2); 16638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), str_8x8); 16648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 16658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S} 16668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 16678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 16688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/******************* 16x16 Modes *******************/ 16698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 16708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/** 16718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 16728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 16738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *ih264_intra_pred_luma_16x16_mode_vert_ssse3 16748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 16758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief 16768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_16x16 mode:Vertical 16778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 16788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description: 16798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_16x16 mode:Vertical, described in sec 8.3.3.1 16808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 16818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src 16828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the source 16838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 16848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst 16858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the destination 16868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 16878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd 16888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer source stride 16898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 16908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd 16918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer destination stride 16928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 16938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail 16948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels (Not used in this function) 16958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 16968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns 16978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 16988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks 16998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * None 17008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 17018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/ 17028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_16x16_mode_vert_ssse3(UWORD8 *pu1_src, 17038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_dst, 17048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 src_strd, 17058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd, 17068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 ngbr_avail) 17078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{ 17088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_top; 17098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd2, dst_strd3, dst_strd4; 17108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 17118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i top_16x8b; 17128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 17138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(src_strd); 17148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(ngbr_avail); 17158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 17168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_top = pu1_src + MB_SIZE + 1; 17178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 17188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd2 = dst_strd << 1; 17198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd4 = dst_strd << 2; 17208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 17218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S top_16x8b = _mm_loadu_si128((__m128i *)pu1_top); 17228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 17238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd3 = dst_strd + dst_strd2; 17248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 17258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b); 17268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b); 17278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), top_16x8b); 17288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), top_16x8b); 17298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_dst += dst_strd4; 17308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 17318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b); 17328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b); 17338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), top_16x8b); 17348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), top_16x8b); 17358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_dst += dst_strd4; 17368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 17378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b); 17388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b); 17398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), top_16x8b); 17408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), top_16x8b); 17418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_dst += dst_strd4; 17428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 17438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b); 17448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b); 17458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), top_16x8b); 17468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), top_16x8b); 17478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S} 17488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 17498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/** 17508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 17518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 17528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *ih264_intra_pred_luma_16x16_mode_horz_ssse3 17538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 17548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief 17558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_16x16 mode:Horizontal 17568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 17578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description: 17588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_16x16 mode:Horizontal, described in sec 8.3.3.2 17598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 17608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src 17618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the source 17628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 17638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst 17648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the destination 17658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 17668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd 17678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer source stride 17688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 17698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd 17708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer destination stride 17718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 17728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail 17738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function) 17748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 17758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns 17768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 17778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks 17788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * None 17798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 17808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/ 17818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_16x16_mode_horz_ssse3(UWORD8 *pu1_src, 17828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_dst, 17838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 src_strd, 17848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd, 17858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 ngbr_avail) 17868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{ 17878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_left; 17888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd2, dst_strd3, dst_strd4; 17898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 17908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b; 17918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 17928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(src_strd); 17938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(ngbr_avail); 17948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 17958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_left = pu1_src + MB_SIZE - 1; 17968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 17978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd4 = dst_strd << 2; 17988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 17998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd2 = dst_strd << 1; 18008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd3 = dst_strd4 - dst_strd; 18018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 1802796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row1_16x8b = _mm_set1_epi8(*(pu1_left)); 1803796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row2_16x8b = _mm_set1_epi8(*(pu1_left - 1)); 1804796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row3_16x8b = _mm_set1_epi8(*(pu1_left - 2)); 1805796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row4_16x8b = _mm_set1_epi8(*(pu1_left - 3)); 18068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 18078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b); 18088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b); 18098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), row3_16x8b); 18108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), row4_16x8b); 18118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 18128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_dst += dst_strd4; 1813796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row1_16x8b = _mm_set1_epi8(*(pu1_left - 4)); 1814796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row2_16x8b = _mm_set1_epi8(*(pu1_left - 5)); 1815796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row3_16x8b = _mm_set1_epi8(*(pu1_left - 6)); 1816796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row4_16x8b = _mm_set1_epi8(*(pu1_left - 7)); 18178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 18188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b); 18198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b); 18208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), row3_16x8b); 18218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), row4_16x8b); 18228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 18238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_dst += dst_strd4; 1824796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row1_16x8b = _mm_set1_epi8(*(pu1_left - 8)); 1825796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row2_16x8b = _mm_set1_epi8(*(pu1_left - 9)); 1826796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row3_16x8b = _mm_set1_epi8(*(pu1_left - 10)); 1827796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row4_16x8b = _mm_set1_epi8(*(pu1_left - 11)); 18288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 18298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b); 18308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b); 18318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), row3_16x8b); 18328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), row4_16x8b); 18338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 18348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_dst += dst_strd4; 1835796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row1_16x8b = _mm_set1_epi8(*(pu1_left - 12)); 1836796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row2_16x8b = _mm_set1_epi8(*(pu1_left - 13)); 1837796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row3_16x8b = _mm_set1_epi8(*(pu1_left - 14)); 1838796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy row4_16x8b = _mm_set1_epi8(*(pu1_left - 15)); 18398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 18408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b); 18418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b); 18428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), row3_16x8b); 18438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), row4_16x8b); 18448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S} 18458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 18468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/** 18478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 18488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 18498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *ih264_intra_pred_luma_16x16_mode_dc_ssse3 18508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 18518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief 18528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_16x16 mode:DC 18538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 18548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description: 18558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_16x16 mode:DC, described in sec 8.3.3.3 18568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 18578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src 18588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the source 18598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 18608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst 18618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the destination 18628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 18638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd 18648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer source stride 18658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 18668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd 18678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer destination stride 18688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 18698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ** @param[in] ngbr_avail 18708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels 18718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 18728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns 18738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 18748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks 18758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * None 18768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 18778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/ 18788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_16x16_mode_dc_ssse3(UWORD8 *pu1_src, 18798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_dst, 18808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 src_strd, 18818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd, 18828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 ngbr_avail) 18838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{ 18848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD8 u1_useleft, u1_usetop; 18858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dc_val; 18868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 18878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd2, dst_strd3, dst_strd4; 18888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 18898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i dc_val_16x8b; 18908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 18918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(src_strd); 18928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 18938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK); 18948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK); 18958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 18968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S if(u1_useleft || u1_usetop) 18978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S { 18988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 shft; 18998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i val_16x8b, zero_16x8b, sum_8x16b; 19008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 19018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dc_val = 0; 19028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S shft = 3; 19038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 19048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S zero_16x8b = _mm_setzero_si128(); 19058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 19068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S if(u1_useleft) 19078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S { 19088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_left; 19098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 19108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_left = pu1_src + MB_SIZE - 1; 19118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 19128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S val_16x8b = _mm_loadu_si128((__m128i *)(pu1_left - 15)); 19138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sum_8x16b = _mm_sad_epu8(zero_16x8b, val_16x8b); 19148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 19158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S shft++; 19168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dc_val += 8; 19178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dc_val += _mm_extract_epi16(sum_8x16b, 0); 19188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dc_val += _mm_extract_epi16(sum_8x16b, 4); 19198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S } 19208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S if(u1_usetop) 19218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S { 19228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_top; 19238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 19248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_top = pu1_src + MB_SIZE + 1; 19258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 19268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S val_16x8b = _mm_loadu_si128((__m128i *)pu1_top); 19278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S sum_8x16b = _mm_sad_epu8(zero_16x8b, val_16x8b); 19288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 19298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S shft++; 19308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dc_val += 8; 19318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dc_val += _mm_extract_epi16(sum_8x16b, 0); 19328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dc_val += _mm_extract_epi16(sum_8x16b, 4); 19338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S } 19348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dc_val = dc_val >> shft; 19358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S } 19368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S else 19378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dc_val = 128; 19388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 19398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dc_val_16x8b = _mm_set1_epi8(dc_val); 19408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 19418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd2 = dst_strd << 1; 19428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd4 = dst_strd << 2; 19438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S dst_strd3 = dst_strd + dst_strd2; 19448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 19458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)pu1_dst, dc_val_16x8b); 19468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), dc_val_16x8b); 19478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), dc_val_16x8b); 19488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), dc_val_16x8b); 19498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_dst += dst_strd4; 19508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 19518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)pu1_dst, dc_val_16x8b); 19528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), dc_val_16x8b); 19538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), dc_val_16x8b); 19548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), dc_val_16x8b); 19558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_dst += dst_strd4; 19568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 19578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)pu1_dst, dc_val_16x8b); 19588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), dc_val_16x8b); 19598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), dc_val_16x8b); 19608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), dc_val_16x8b); 19618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_dst += dst_strd4; 19628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 19638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)pu1_dst, dc_val_16x8b); 19648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), dc_val_16x8b); 19658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), dc_val_16x8b); 19668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), dc_val_16x8b); 19678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S} 19688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 19698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/** 19708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************* 19718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 19728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *ih264_intra_pred_luma_16x16_mode_plane_ssse3 19738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 19748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief 19758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_16x16 mode:PLANE 19768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 19778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description: 19788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Perform Intra prediction for luma_16x16 mode:PLANE, described in sec 8.3.3.4 19798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 19808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src 19818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the source 19828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 19838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst 19848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * UWORD8 pointer to the destination 19858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 19868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd 19878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer source stride 19888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 19898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd 19908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * integer destination stride 19918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 19928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail 19938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function) 19948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 19958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns 19968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 19978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks 19988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * None 19998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * 20008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/ 20018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_16x16_mode_plane_ssse3(UWORD8 *pu1_src, 20028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_dst, 20038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 src_strd, 20048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 dst_strd, 20058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 ngbr_avail) 20068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{ 20078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UWORD8 *pu1_left, *pu1_top; 20088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 a, b, c; 20098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 20108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i rev_8x16b, mul_8x16b, zero_16x8b; 20118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 20128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(src_strd); 20138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S UNUSED(ngbr_avail); 20148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 20158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_top = pu1_src + MB_SIZE + 1; 20168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_left = pu1_src + MB_SIZE - 1; 20178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 20188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S rev_8x16b = _mm_setr_epi16(0x0f0e, 0x0d0c, 0x0b0a, 0x0908, 0x0706, 0x0504, 0x0302, 0x0100); 20198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S //used to reverse the order of 16-bit values in a vector 20208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 20218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S mul_8x16b = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); 20228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S zero_16x8b = _mm_setzero_si128(); 20238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 20248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S //calculating a, b and c 20258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S { 20268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S WORD32 h, v; 20278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 20288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i h_val1_16x8b, h_val2_16x8b; 20298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i h_val1_8x16b, h_val2_8x16b, h_val_4x32b; 20308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i v_val1_16x8b, v_val2_16x8b; 20318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i v_val1_8x16b, v_val2_8x16b, v_val_4x32b; 20328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i hv_val_4x32b; 20338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 20348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S a = (pu1_top[15] + pu1_left[-15]) << 4; 20358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 20368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S h_val1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_top + 8)); 20378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S h_val2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_top - 1)); 20388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S v_val1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 15)); 20398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S v_val2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 6)); 20408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 20418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S h_val1_8x16b = _mm_unpacklo_epi8(h_val1_16x8b, zero_16x8b); 20428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S h_val2_8x16b = _mm_unpacklo_epi8(h_val2_16x8b, zero_16x8b); 20438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S v_val1_8x16b = _mm_unpacklo_epi8(v_val1_16x8b, zero_16x8b); 20448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S v_val2_8x16b = _mm_unpacklo_epi8(v_val2_16x8b, zero_16x8b); 20458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 20468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S h_val2_8x16b = _mm_shuffle_epi8(h_val2_8x16b, rev_8x16b); 20478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S v_val1_8x16b = _mm_shuffle_epi8(v_val1_8x16b, rev_8x16b); 20488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 20498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S h_val1_8x16b = _mm_sub_epi16(h_val1_8x16b, h_val2_8x16b); 20508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S v_val1_8x16b = _mm_sub_epi16(v_val1_8x16b, v_val2_8x16b); 20518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 20528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S h_val_4x32b = _mm_madd_epi16(mul_8x16b, h_val1_8x16b); 20538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S v_val_4x32b = _mm_madd_epi16(mul_8x16b, v_val1_8x16b); 20548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 20558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S hv_val_4x32b = _mm_hadd_epi32(h_val_4x32b, v_val_4x32b); 20568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S hv_val_4x32b = _mm_hadd_epi32(hv_val_4x32b, hv_val_4x32b); 20578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 20588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S h = _mm_extract_epi16(hv_val_4x32b, 0); 20598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S v = _mm_extract_epi16(hv_val_4x32b, 2); 20608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S h = (h << 16) >> 16; 20618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S v = (v << 16) >> 16; 20628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 20638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S b = ((h << 2) + h + 32) >> 6; 20648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S c = ((v << 2) + v + 32) >> 6; 20658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S } 20668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 20678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S //using a, b and c to compute the fitted plane values 20688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S { 20698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i const_8x16b, b_8x16b, c_8x16b, c2_8x16b; 20708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i res1_l_8x16b, res1_h_8x16b; 20718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i res2_l_8x16b, res2_h_8x16b; 20728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i res1_sh_l_8x16b, res1_sh_h_8x16b, res1_16x8b; 20738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S __m128i res2_sh_l_8x16b, res2_sh_h_8x16b, res2_16x8b; 20748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 20758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S b_8x16b = _mm_set1_epi16(b); 20768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S c_8x16b = _mm_set1_epi16(c); 20778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S c2_8x16b = _mm_set1_epi16(c << 1); 20788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S const_8x16b = _mm_set1_epi16(a - c*7 + 16); 20798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 20808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_h_8x16b = _mm_mullo_epi16(mul_8x16b, b_8x16b); 20818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S //contains {b*1, b*2, b*3,... b*8} 20828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 20838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_l_8x16b = _mm_shuffle_epi8(res1_h_8x16b, rev_8x16b); 20848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_l_8x16b = _mm_srli_si128(res1_l_8x16b, 2); 20858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_l_8x16b = _mm_sub_epi16(zero_16x8b, res1_l_8x16b); 20868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S //contains {-b*7, -b*6,... -b*1, b*0} 20878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 20888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S // rows 1, 2 20898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, const_8x16b); 20908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, const_8x16b); 20918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_h_8x16b = _mm_add_epi16(res1_h_8x16b, c_8x16b); 20928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_l_8x16b = _mm_add_epi16(res1_l_8x16b, c_8x16b); 20938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 20948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5); 20958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5); 20968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5); 20978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5); 20988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 20998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b); 21008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b); 21018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b); 21038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b); 21048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S // rows 3, 4 21068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b); 21078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b); 21088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b); 21098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b); 21108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5); 21128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5); 21138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5); 21148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5); 21158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_dst += dst_strd << 1; 21178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b); 21198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b); 21208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b); 21228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b); 21238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S // rows 5, 6 21258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b); 21268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b); 21278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b); 21288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b); 21298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5); 21318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5); 21328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5); 21338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5); 21348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_dst += dst_strd << 1; 21368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b); 21388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b); 21398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b); 21418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b); 21428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S // rows 7, 8 21448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b); 21458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b); 21468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b); 21478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b); 21488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5); 21508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5); 21518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5); 21528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5); 21538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_dst += dst_strd << 1; 21558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b); 21578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b); 21588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b); 21608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b); 21618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S // rows 9, 10 21638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b); 21648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b); 21658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b); 21668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b); 21678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5); 21698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5); 21708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5); 21718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5); 21728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_dst += dst_strd << 1; 21748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b); 21768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b); 21778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b); 21798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b); 21808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S // rows 11, 12 21828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b); 21838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b); 21848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b); 21858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b); 21868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5); 21888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5); 21898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5); 21908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5); 21918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_dst += dst_strd << 1; 21938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b); 21958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b); 21968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 21978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b); 21988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b); 21998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 22008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S // rows 13, 14 22018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b); 22028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b); 22038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b); 22048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b); 22058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 22068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5); 22078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5); 22088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5); 22098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5); 22108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 22118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_dst += dst_strd << 1; 22128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 22138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b); 22148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b); 22158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 22168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b); 22178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b); 22188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 22198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S // rows 15, 16 22208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b); 22218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b); 22228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b); 22238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b); 22248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 22258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5); 22268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5); 22278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5); 22288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5); 22298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 22308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S pu1_dst += dst_strd << 1; 22318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 22328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b); 22338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b); 22348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S 22358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b); 22368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b); 22378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S } 22388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S} 2239