18d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/******************************************************************************
28d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
38d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Copyright (C) 2015 The Android Open Source Project
48d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
58d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Licensed under the Apache License, Version 2.0 (the "License");
68d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * you may not use this file except in compliance with the License.
78d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * You may obtain a copy of the License at:
88d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
98d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * http://www.apache.org/licenses/LICENSE-2.0
108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Unless required by applicable law or agreed to in writing, software
128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * distributed under the License is distributed on an "AS IS" BASIS,
138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * See the License for the specific language governing permissions and
158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * limitations under the License.
168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *****************************************************************************
188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S*/
208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/**
218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @file
238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  ih264_luma_intra_pred_filters_ssse3.c
248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief
268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Contains function definitions for luma intra prediction filters in x86
278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  intrinsics
288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @author
308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Ittiam
318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par List of Functions:
338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  - ih264_intra_pred_luma_4x4_mode_vert_ssse3
348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  - ih264_intra_pred_luma_4x4_mode_horz_ssse3
358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  - ih264_intra_pred_luma_4x4_mode_dc_ssse3
368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  - ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3
378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  - ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3
388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  - ih264_intra_pred_luma_4x4_mode_vert_r_ssse3
398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  - ih264_intra_pred_luma_4x4_mode_horz_d_ssse3
408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  - ih264_intra_pred_luma_4x4_mode_vert_l_ssse3
418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  - ih264_intra_pred_luma_4x4_mode_horz_u_ssse3
428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  - ih264_intra_pred_luma_8x8_mode_vert_ssse3
438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  - ih264_intra_pred_luma_8x8_mode_horz_ssse3
448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  - ih264_intra_pred_luma_8x8_mode_dc_ssse3
458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  - ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3
468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  - ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3
478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  - ih264_intra_pred_luma_8x8_mode_vert_r_ssse3
488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  - ih264_intra_pred_luma_8x8_mode_horz_d_ssse3
498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  - ih264_intra_pred_luma_8x8_mode_vert_l_ssse3
508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  - ih264_intra_pred_luma_8x8_mode_horz_u_ssse3
518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  - ih264_intra_pred_luma_16x16_mode_vert_ssse3
528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  - ih264_intra_pred_luma_16x16_mode_horz_ssse3
538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  - ih264_intra_pred_luma_16x16_mode_dc_ssse3
548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  - ih264_intra_pred_luma_16x16_mode_plane_ssse3
558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks
578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  None
588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ******************************************************************************
608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S */
618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/*****************************************************************************/
638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/* File Includes                                                             */
648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/*****************************************************************************/
658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/* System include files */
668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S#include <stdio.h>
678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S#include <stddef.h>
688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S#include <string.h>
698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S#include <immintrin.h>
708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/* User include files */
728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S#include "ih264_defs.h"
738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S#include "ih264_typedefs.h"
748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S#include "ih264_macros.h"
758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S#include "ih264_platform_macros.h"
768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S#include "ih264_intra_pred_filters.h"
778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/*******************    LUMA INTRAPREDICTION    *******************/
818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/*******************    4x4 Modes    *******************/
838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/**
858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_4x4_mode_vert_ssse3
888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief
908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_4x4 mode:vertical
918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description:
938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_4x4 mode:vertical ,described in sec 8.3.1.2.1
948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src
968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the source
978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst
998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the destination
1008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
1018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd
1028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer source stride
1038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
1048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd
1058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer destination stride
1068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
1078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail
1088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function)
1098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
1108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns
1118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
1128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks
1138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  None
1148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
1158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
1168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S */
1178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_4x4_mode_vert_ssse3(UWORD8 *pu1_src,
1188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                               UWORD8 *pu1_dst,
1198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                               WORD32 src_strd,
1208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                               WORD32 dst_strd,
1218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                               WORD32 ngbr_avail)
1228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{
1238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 *pu1_top;
1248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    WORD32 dst_strd2, dst_strd3;
125796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    WORD32 i4_top;
1268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(src_strd);
1288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(ngbr_avail);
1298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_top = pu1_src + BLK_SIZE + 1;
1318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
132796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    i4_top = *((WORD32 *)pu1_top);
1338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd2 = dst_strd << 1;
1358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd3 = dst_strd + dst_strd2;
1368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
137796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst)) = i4_top;
138796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd)) = i4_top;
139796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd2)) = i4_top;
140796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd3)) = i4_top;
1418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S}
1428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/**
1448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
1458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
1468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *ih264_intra_pred_luma_4x4_mode_horz_ssse3
1478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
1488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief
1498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_4x4 mode:horizontal
1508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
1518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description:
1528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_4x4 mode:horizontal ,described in sec 8.3.1.2.2
1538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
1548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src
1558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the source
1568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
1578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst
1588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the destination
1598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
1608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd
1618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer source stride
1628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
1638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd
1648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer destination stride
1658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
1668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail
1678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function)
1688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
1698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns
1708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
1718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks
1728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  None
1738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
1748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
1758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S */
1768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_4x4_mode_horz_ssse3(UWORD8 *pu1_src,
1778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                               UWORD8 *pu1_dst,
1788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                               WORD32 src_strd,
1798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                               WORD32 dst_strd,
1808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                               WORD32 ngbr_avail)
1818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{
182796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
183796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    WORD32 row1,row2,row3,row4;
184796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    UWORD8 val;
1858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    WORD32 dst_strd2, dst_strd3;
1868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(src_strd);
1888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(ngbr_avail);
1898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_left = pu1_src + BLK_SIZE - 1;
1908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
191796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    val  = *pu1_left;
192796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row1 = val + (val << 8) + (val << 16) + (val << 24);
193796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    val  = *(pu1_left - 1);
194796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row2 = val + (val << 8) + (val << 16) + (val << 24);
195796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    val  = *(pu1_left - 2);
196796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row3 = val + (val << 8) + (val << 16) + (val << 24);
197796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    val  = *(pu1_left - 3);
198796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row4 = val + (val << 8) + (val << 16) + (val << 24);
1998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd2 = dst_strd << 1;
2018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd3 = dst_strd + dst_strd2;
2028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
203796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst)) = row1;
204796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd)) = row2;
205796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd2)) = row3;
206796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd3)) = row4;
2078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S}
2088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/**
2108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
2118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
2128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_4x4_mode_dc_ssse3
2138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
2148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief
2158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_4x4 mode:DC
2168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
2178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description:
2188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_4x4 mode:DC ,described in sec 8.3.1.2.3
2198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
2208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src
2218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the source
2228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
2238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst
2248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the destination
2258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
2268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd
2278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer source stride
2288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
2298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd
2308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer destination stride
2318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
2328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail
2338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  availability of neighbouring pixels
2348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
2358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns
2368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
2378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks
2388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  None
2398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
2408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/
2418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_4x4_mode_dc_ssse3(UWORD8 *pu1_src,
2428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                             UWORD8 *pu1_dst,
2438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                             WORD32 src_strd,
2448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                             WORD32 dst_strd,
2458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                             WORD32 ngbr_avail)
2468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{
2478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 u1_useleft; /* availability of left predictors (only for DC) */
248796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    UWORD8 u1_usetop; /* availability of top predictors (only for DC) */
249796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
250796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
2518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    WORD32 dst_strd2, dst_strd3;
252796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    WORD32 val = 0;
2538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(src_strd);
2548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(ngbr_avail);
2558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK);
2568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK);
2578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_top = pu1_src + BLK_SIZE + 1;
258796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    pu1_left = pu1_src + BLK_SIZE - 1;
2598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
260796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    if(u1_useleft)
2618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    {
262796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy        val += *pu1_left--;
263796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy        val += *pu1_left--;
264796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy        val += *pu1_left--;
265796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy        val += *pu1_left + 2;
2668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    }
267796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    if(u1_usetop)
268796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    {
269796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy        val += *pu1_top + *(pu1_top + 1) + *(pu1_top + 2) + *(pu1_top + 3)
270796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy                        + 2;
271796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    }
272796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    /* Since 2 is added if either left/top pred is there,
273796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy     val still being zero implies both preds are not there */
274796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    val = (val) ? (val >> (1 + u1_useleft + u1_usetop)) : 128;
275796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy
276796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    val = val + (val << 8) + (val << 16) + (val << 24);
2778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd2 = dst_strd << 1;
2798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd3 = dst_strd + dst_strd2;
2808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
281796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst)) = val;
282796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd)) = val;
283796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd2)) = val;
284796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd3)) = val;
2858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S}
2868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
2878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/**
2888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
2898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
2908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3
2918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
2928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief
2938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left
2948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
2958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description:
2968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left ,described in sec 8.3.1.2.4
2978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
2988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src
2998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the source
3008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
3018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst
3028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the destination
3038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
3048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd
3058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer source stride
3068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
3078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd
3088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer destination stride
3098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
3108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail
3118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function)
3128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
3138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns
3148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
3158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks
3168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  None
3178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
3188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/
3198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3(UWORD8 *pu1_src,
3208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                  UWORD8 *pu1_dst,
3218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                  WORD32 src_strd,
3228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                  WORD32 dst_strd,
3238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                  WORD32 ngbr_avail)
3248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{
3258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 *pu1_top;
3268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    WORD32 dst_strd2, dst_strd3;
3278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
3288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i top_16x8b, top_8x16b, top_sh_8x16b;
3298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i res1_8x16b, res2_8x16b, res_16x8b;
3308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i zero_vector, const_2_8x16b;
331796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    WORD32 row1,row2,row3,row4;
3328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
3338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(src_strd);
3348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(ngbr_avail);
3358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
3368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_top = pu1_src + BLK_SIZE + 1;
3378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
3388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    top_16x8b = _mm_loadl_epi64((__m128i *)pu1_top);
3398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    zero_vector = _mm_setzero_si128();
3408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    top_8x16b = _mm_unpacklo_epi8(top_16x8b, zero_vector);    //t0 t1 t2 t3 t4 t5 t6 t7
3418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
3428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    top_sh_8x16b = _mm_srli_si128(top_8x16b, 2);              //t1 t2 t3 t4 t5 t6 t7 0
3438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    const_2_8x16b = _mm_set1_epi16(2);
3448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
3458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    top_sh_8x16b = _mm_shufflehi_epi16(top_sh_8x16b, 0xa4);   //t1 t2 t3 t4 t5 t6 t7 t7
3468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res1_8x16b = _mm_add_epi16(top_8x16b, top_sh_8x16b);
3478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res2_8x16b = _mm_srli_si128(res1_8x16b, 2);
3488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
3498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res1_8x16b = _mm_add_epi16(res1_8x16b, const_2_8x16b);
3508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res1_8x16b = _mm_add_epi16(res2_8x16b, res1_8x16b);
3518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res1_8x16b = _mm_srai_epi16(res1_8x16b, 2);
3528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
3538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd2 = dst_strd << 1;
3548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd3 = dst_strd + dst_strd2;
3558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
3568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res_16x8b = _mm_packus_epi16(res1_8x16b, res1_8x16b);
357796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row1 = _mm_cvtsi128_si32(res_16x8b);
3588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res_16x8b = _mm_srli_si128(res_16x8b, 1);
359796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row2 = _mm_cvtsi128_si32(res_16x8b);
3608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res_16x8b = _mm_srli_si128(res_16x8b, 1);
361796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row3 = _mm_cvtsi128_si32(res_16x8b);
3628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res_16x8b = _mm_srli_si128(res_16x8b, 1);
363796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row4 = _mm_cvtsi128_si32(res_16x8b);
364796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy
365796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst)) = row1;
366796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd)) = row2;
367796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd2)) = row3;
368796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd3)) = row4;
3698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S}
3708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
3718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/**
3728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
3738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
3748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3
3758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
3768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief
3778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right
3788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
3798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description:
3808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right ,described in sec 8.3.1.2.5
3818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
3828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src
3838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the source
3848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
3858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst
3868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the destination
3878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
3888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd
3898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer source stride
3908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
3918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd
3928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer destination stride
3938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
3948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail
3958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function)
3968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
3978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns
3988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
3998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks
4008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  None
4018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
4028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/
4038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3(UWORD8 *pu1_src,
4048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                  UWORD8 *pu1_dst,
4058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                  WORD32 src_strd,
4068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                  WORD32 dst_strd,
4078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                  WORD32 ngbr_avail)
4088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{
4098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 *pu1_left;
4108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    WORD32 dst_strd2, dst_strd3;
4118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
4128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i top_left_16x8b, top_left_8x16b;
4138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i top_left_sh_16x8b, top_left_sh_8x16b;
4148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i res1_8x16b, res2_8x16b;
4158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i res1_16x8b, res2_16x8b;
4168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i zero_vector, const_2_8x16b;
417796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    WORD32 row1,row2,row3,row4;
4188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
4198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(src_strd);
4208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(ngbr_avail);
4218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
4228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_left = pu1_src + BLK_SIZE - 1;
4238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
4248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    top_left_16x8b = _mm_loadu_si128((__m128i *)(pu1_left - 3));             //l3 l2 l1 l0 tl t0 t1 t2...
4258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    zero_vector = _mm_setzero_si128();
4268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    top_left_sh_16x8b = _mm_srli_si128(top_left_16x8b, 1);                   //l2 l1 l0 tl t0 t1 t2 t3...
4278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
4288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    top_left_8x16b = _mm_unpacklo_epi8(top_left_16x8b, zero_vector);
4298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    top_left_sh_8x16b = _mm_unpacklo_epi8(top_left_sh_16x8b, zero_vector);
4308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
4318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res1_8x16b = _mm_add_epi16(top_left_8x16b, top_left_sh_8x16b);           //l3+l2 l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2+t3...
4328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    const_2_8x16b = _mm_set1_epi16(2);
4338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res2_8x16b = _mm_srli_si128(res1_8x16b, 2);                              //l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2+t3...
4348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
4358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res1_8x16b = _mm_add_epi16(res1_8x16b, const_2_8x16b);
4368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res1_8x16b = _mm_add_epi16(res2_8x16b, res1_8x16b);                      //l3+2*l2+l1+2 l2+2*l1+l0+2...
4378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res1_8x16b = _mm_srai_epi16(res1_8x16b, 2);
4388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res1_16x8b = _mm_packus_epi16(res1_8x16b, res1_8x16b);
4398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
4408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd2 = dst_strd << 1;
4418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd3 = dst_strd + dst_strd2;
4428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
4438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res2_16x8b = _mm_srli_si128(res1_16x8b, 3);
444796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy
445796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row1 = _mm_cvtsi128_si32(res2_16x8b);
4468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res2_16x8b = _mm_srli_si128(res1_16x8b, 2);
447796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row2 = _mm_cvtsi128_si32(res2_16x8b);
4488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res2_16x8b = _mm_srli_si128(res1_16x8b, 1);
449796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row3 = _mm_cvtsi128_si32(res2_16x8b);
450796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row4 = _mm_cvtsi128_si32(res1_16x8b);
451796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy
452796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst)) = row1;
453796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd)) = row2;
454796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd2)) = row3;
455796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd3)) = row4;
4568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S}
4578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
4588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/**
4598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
4608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
4618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_4x4_mode_vert_r_ssse3
4628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
4638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief
4648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_4x4 mode:Vertical_Right
4658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
4668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description:
4678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_4x4 mode:Vertical_Right ,described in sec 8.3.1.2.6
4688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
4698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src
4708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the source
4718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
4728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst
4738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the destination
4748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
4758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd
4768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer source stride
4778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
4788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd
4798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer destination stride
4808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
4818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail
4828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function)
4838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
4848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns
4858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
4868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks
4878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  None
4888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
4898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/
4908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_4x4_mode_vert_r_ssse3(UWORD8 *pu1_src,
4918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 UWORD8 *pu1_dst,
4928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 src_strd,
4938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 dst_strd,
4948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 ngbr_avail)
4958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{
4968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 *pu1_left;
4978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    WORD32 dst_strd2, dst_strd3;
4988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
4998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i val_16x8b, temp_16x8b;
5008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i w11_a1_16x8b, w11_a2_16x8b;
5018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i w121_a1_8x16b, w121_a2_8x16b, w121_sh_8x16b;
5028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b;
5038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i zero_vector, const_2_8x16b;
504796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    WORD32 row1,row2,row3,row4;
5058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
5068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(src_strd);
5078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(ngbr_avail);
5088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
5098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_left = pu1_src + BLK_SIZE - 1;
5108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
5118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    val_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 2));
5128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    zero_vector = _mm_setzero_si128();
5138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
5148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a1_8x16b = _mm_unpacklo_epi8(val_16x8b, zero_vector);        //l2 l1 l0 tl t0 t1 t2 t3
5158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w11_a1_16x8b = _mm_srli_si128(val_16x8b, 3);
5168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2);                 //l1 l0 tl t0 t1 t2 t3 0
5178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w11_a2_16x8b = _mm_srli_si128(val_16x8b, 4);
5188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
5198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b);      //l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2+t3 t3
5208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row1_16x8b = _mm_avg_epu8(w11_a1_16x8b, w11_a2_16x8b);
5218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2);                 //l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2+t3 t3    0
5228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
5238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    const_2_8x16b = _mm_set1_epi16(2);
5248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b);      //l2+2*l1+l0 l1+2*l0+tl ...
5258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, const_2_8x16b);
5268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a1_8x16b = _mm_srai_epi16(w121_a1_8x16b, 2);
5278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
5288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_sh_8x16b = _mm_shufflelo_epi16(w121_a1_8x16b, 0xe1);
5298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_sh_8x16b = _mm_srli_si128(w121_sh_8x16b, 2);
5308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
5318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row4_16x8b = _mm_packus_epi16(w121_sh_8x16b, w121_sh_8x16b);
5328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp_16x8b = _mm_slli_si128(w121_a1_8x16b, 13);
5338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row2_16x8b = _mm_srli_si128(row4_16x8b, 1);
5348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row3_16x8b = _mm_alignr_epi8(row1_16x8b, temp_16x8b, 15);
5358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
5368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd2 = dst_strd << 1;
5378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd3 = dst_strd + dst_strd2;
5388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
539796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row1 = _mm_cvtsi128_si32(row1_16x8b);
540796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row2 = _mm_cvtsi128_si32(row2_16x8b);
541796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row3 = _mm_cvtsi128_si32(row3_16x8b);
542796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row4 = _mm_cvtsi128_si32(row4_16x8b);
543796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy
544796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst)) = row1;
545796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd)) = row2;
546796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd2)) = row3;
547796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd3)) = row4;
5488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S}
5498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
5508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/*
5518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
5528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
5538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_4x4_mode_horz_d_ssse3
5548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
5558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief
5568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_4x4 mode:Horizontal_Down
5578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
5588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description:
5598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_4x4 mode:Horizontal_Down ,described in sec 8.3.1.2.7
5608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
5618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src
5628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the source
5638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
5648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst
5658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the destination
5668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
5678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd
5688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer source stride
5698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
5708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd
5718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer destination stride
5728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
5738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail
5748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function)
5758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
5768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns
5778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
5788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks
5798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  None
5808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
5818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/
5828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_4x4_mode_horz_d_ssse3(UWORD8 *pu1_src,
5838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                           UWORD8 *pu1_dst,
5848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                           WORD32 src_strd,
5858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                           WORD32 dst_strd,
5868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                           WORD32 ngbr_avail)
5878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{
5888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 *pu1_left;
5898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    WORD32 dst_strd2, dst_strd3;
5908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    WORD32 val_121_t0t1;
5918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
5928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i val_16x8b, val_sh_16x8b;
5938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i w11_16x8b;
5948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i w121_a1_8x16b, w121_a2_8x16b, w121_16x8b;
5958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b;
5968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
5978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i zero_vector, const_2_8x16b;
598796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    WORD32 row1,row2,row3,row4;
5998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
6008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(src_strd);
6018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(ngbr_avail);
6028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
6038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_left = pu1_src + BLK_SIZE - 1;
6048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
6058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    val_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 3));
6068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    zero_vector = _mm_setzero_si128();
6078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    val_sh_16x8b = _mm_srli_si128(val_16x8b, 1);
6088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w11_16x8b = _mm_avg_epu8(val_16x8b, val_sh_16x8b);
6098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
6108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a1_8x16b = _mm_unpacklo_epi8(val_16x8b, zero_vector);        //l3 l2 l1 l0 tl t0 t1 t2
6118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2);                 //l2 l1 l0 tl t0 t1 t2 0
6128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b);      //l3+l2 l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2
6138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2);                 //l2+l1 l1+l0 l0+tl tl+t0 t0+t1 t1+t2 t2    0
6148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
6158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    zero_vector = _mm_setzero_si128();
6168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    const_2_8x16b = _mm_set1_epi16(2);
6178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
6188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b);      //l3+2*l2+l1 l2+2*l1+l0 l1+2*l0+tl ...
6198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, const_2_8x16b);
6208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a1_8x16b = _mm_srai_epi16(w121_a1_8x16b, 2);
6218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
6228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_16x8b = _mm_packus_epi16(w121_a1_8x16b, w121_a1_8x16b);
6238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
6248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row4_16x8b = _mm_unpacklo_epi8(w11_16x8b, w121_16x8b);
6258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    val_121_t0t1 = _mm_extract_epi16(w121_16x8b, 2);
6268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row4_16x8b = _mm_insert_epi16(row4_16x8b, val_121_t0t1, 4);
6278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
6288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd2 = dst_strd << 1;
6298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd3 = dst_strd + dst_strd2;
6308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
6318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row1_16x8b = _mm_srli_si128(row4_16x8b, 6);
6328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row2_16x8b = _mm_srli_si128(row4_16x8b, 4);
6338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row3_16x8b = _mm_srli_si128(row4_16x8b, 2);
6348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
635796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row1 = _mm_cvtsi128_si32(row1_16x8b);
636796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row2 = _mm_cvtsi128_si32(row2_16x8b);
637796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row3 = _mm_cvtsi128_si32(row3_16x8b);
638796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row4 = _mm_cvtsi128_si32(row4_16x8b);
639796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy
640796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst)) = row1;
641796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd)) = row2;
642796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd2)) = row3;
643796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd3)) = row4;
6448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S}
6458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
6468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/**
6478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
6488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
6498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_4x4_mode_vert_l_ssse3
6508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
6518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief
6528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_4x4 mode:Vertical_Left
6538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
6548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description:
6558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_4x4 mode:Vertical_Left ,described in sec 8.3.1.2.8
6568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
6578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src
6588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the source
6598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
6608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst
6618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the destination
6628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
6638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd
6648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer source stride
6658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
6668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd
6678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer destination stride
6688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
6698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail
6708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function)
6718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
6728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns
6738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
6748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks
6758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  None
6768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
6778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/
6788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_4x4_mode_vert_l_ssse3(UWORD8 *pu1_src,
6798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 UWORD8 *pu1_dst,
6808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 src_strd,
6818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 dst_strd,
6828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 ngbr_avail)
6838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{
6848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 *pu1_top;
6858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    WORD32 dst_strd2, dst_strd3;
6868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
6878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i val_16x8b, val_sh_16x8b;
6888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i w121_a1_8x16b, w121_a2_8x16b;
6898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b;
6908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
6918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i zero_vector, const_2_8x16b;
692796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    WORD32 row1,row2,row3,row4;
6938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
6948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(src_strd);
6958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(ngbr_avail);
6968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
6978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_top = pu1_src +BLK_SIZE + 1;
6988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
6998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    val_16x8b = _mm_loadl_epi64((__m128i *)pu1_top);
7008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    zero_vector = _mm_setzero_si128();
7018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    val_sh_16x8b = _mm_srli_si128(val_16x8b, 1);
7028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row1_16x8b = _mm_avg_epu8(val_16x8b, val_sh_16x8b);
7038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
7048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a1_8x16b = _mm_unpacklo_epi8(val_16x8b, zero_vector);        //t0 t1 t2 t3 t4 t5...
7058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2);                 //t1 t2 t3 t4 t5 t6...
7068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b);      //t0+t1 t1+t2 t2+t3 t3+t4 t4+t5...
7078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2);                 //t1+t2 t2+t3 t3+t4 t4+t5 t5+t6...
7088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
7098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    zero_vector = _mm_setzero_si128();
7108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    const_2_8x16b = _mm_set1_epi16(2);
7118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
7128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b);      //t0+2*t1+t2 t1+2*t2+t3 t2+2*t3+t4...
7138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, const_2_8x16b);
7148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a1_8x16b = _mm_srai_epi16(w121_a1_8x16b, 2);
7158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
7168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row2_16x8b = _mm_packus_epi16(w121_a1_8x16b, w121_a1_8x16b);
7178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
7188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd2 = dst_strd << 1;
7198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd3 = dst_strd + dst_strd2;
7208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
7218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row3_16x8b = _mm_srli_si128(row1_16x8b, 1);
7228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row4_16x8b = _mm_srli_si128(row2_16x8b, 1);
7238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
724796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row1 = _mm_cvtsi128_si32(row1_16x8b);
725796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row2 = _mm_cvtsi128_si32(row2_16x8b);
726796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row3 = _mm_cvtsi128_si32(row3_16x8b);
727796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row4 = _mm_cvtsi128_si32(row4_16x8b);
728796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy
729796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst)) = row1;
730796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd)) = row2;
731796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd2)) = row3;
732796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd3)) = row4;
7338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S}
7348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
7358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/**
7368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
7378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
7388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_4x4_mode_horz_u_ssse3
7398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
7408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief
7418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_4x4 mode:Horizontal_Up
7428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
7438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description:
7448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_4x4 mode:Horizontal_Up ,described in sec 8.3.1.2.9
7458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
7468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src
7478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the source
7488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
7498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst
7508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the destination
7518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
7528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd
7538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer source stride
7548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
7558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd
7568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer destination stride
7578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
7588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail
7598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function)
7608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
7618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns
7628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
7638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks
7648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  None
7658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
7668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/
7678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_4x4_mode_horz_u_ssse3(UWORD8 *pu1_src,
7688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 UWORD8 *pu1_dst,
7698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 src_strd,
7708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 dst_strd,
7718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 ngbr_avail)
7728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{
7738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 *pu1_left;
7748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    WORD32 dst_strd2, dst_strd3;
7758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
7768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i val_16x8b, val_sh_16x8b;
7778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i w11_16x8b;
7788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i w121_a1_8x16b, w121_a2_8x16b, w121_16x8b;
7798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b;
7808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
7818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i zero_vector, const_2_8x16b, rev_16x8b;
782796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    WORD32 row1,row2,row3,row4;
7838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
7848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(src_strd);
7858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(ngbr_avail);
7868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
7878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_left = pu1_src + BLK_SIZE - 1;
7888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
7898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    zero_vector = _mm_setzero_si128();
7908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    rev_16x8b = _mm_setr_epi8(3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
7918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
7928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    val_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 3));           //l3 l2 l1 l0 0  0  0...
7938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    val_16x8b = _mm_shuffle_epi8(val_16x8b, rev_16x8b);                //l0 l1 l2 l3 l3 l3 l3...
7948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
7958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    val_sh_16x8b = _mm_srli_si128(val_16x8b, 1);
7968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w11_16x8b = _mm_avg_epu8(val_16x8b, val_sh_16x8b);
7978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
7988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a1_8x16b = _mm_unpacklo_epi8(val_16x8b, zero_vector);        //l0 l1 l2 l3 l3 l3...
7998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2);                 //l1 l2 l3 l3 l3 l3...
8008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
8018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b);      //l0+t1 l1+l2 l2+l3 2*l3 2*l3...
8028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a2_8x16b = _mm_srli_si128(w121_a1_8x16b, 2);                 //l1+t2 l2+l3 2*l3  2*l3 2*l3...
8038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
8048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    zero_vector = _mm_setzero_si128();
8058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    const_2_8x16b = _mm_set1_epi16(2);
8068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
8078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, w121_a2_8x16b);      //l0+2*l1+l2 l1+2*l2+l3 l2+3*l3 4*l3 4*l3...
8088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a1_8x16b = _mm_add_epi16(w121_a1_8x16b, const_2_8x16b);
8098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_a1_8x16b = _mm_srai_epi16(w121_a1_8x16b, 2);
8108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
8118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    w121_16x8b = _mm_packus_epi16(w121_a1_8x16b, w121_a1_8x16b);
8128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
8138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd2 = dst_strd << 1;
8148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd3 = dst_strd + dst_strd2;
8158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
8168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row1_16x8b = _mm_unpacklo_epi8(w11_16x8b, w121_16x8b);
8178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row2_16x8b = _mm_srli_si128(row1_16x8b, 2);
8188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row3_16x8b = _mm_srli_si128(row1_16x8b, 4);
8198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row4_16x8b = _mm_srli_si128(row1_16x8b, 6);
8208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
821796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row1 = _mm_cvtsi128_si32(row1_16x8b);
822796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row2 = _mm_cvtsi128_si32(row2_16x8b);
823796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row3 = _mm_cvtsi128_si32(row3_16x8b);
824796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row4 = _mm_cvtsi128_si32(row4_16x8b);
825796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy
826796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst)) = row1;
827796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd)) = row2;
828796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd2)) = row3;
829796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    *((WORD32 *)(pu1_dst + dst_strd3)) = row4;
8308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S}
8318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
8328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/*******************    8x8 Modes    *******************/
8338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
8348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/**
8358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
8368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
8378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_8x8_mode_vert_ssse3
8388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
8398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief
8408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_8x8 mode:vertical
8418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
8428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description:
8438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_8x8 mode:vertical ,described in sec 8.3.2.2.2
8448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
8458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src
8468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the source
8478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
8488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst
8498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the destination
8508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
8518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd
8528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer source stride
8538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
8548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd
8558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer destination stride
8568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
8578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail
8588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function)
8598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
8608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns
8618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
8628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks
8638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  None
8648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
8658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
8668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S */
8678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_8x8_mode_vert_ssse3(UWORD8 *pu1_src,
8688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                               UWORD8 *pu1_dst,
8698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                               WORD32 src_strd,
8708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                               WORD32 dst_strd,
8718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                               WORD32 ngbr_avail)
8728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{
8738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 *pu1_top = NULL;
8748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i top_8x8b;
8758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(src_strd);
8768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(ngbr_avail);
8778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_top = pu1_src + BLK8x8SIZE + 1;
8788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
8798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    top_8x8b = _mm_loadl_epi64((__m128i *)pu1_top);
8808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
8818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), top_8x8b);
8828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), top_8x8b);
8838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), top_8x8b);
8848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), top_8x8b);
8858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), top_8x8b);
8868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), top_8x8b);
8878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), top_8x8b);
8888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), top_8x8b);
8898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S}
8908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
8918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/**
8928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
8938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
8948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_8x8_mode_horz_ssse3
8958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
8968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief
8978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_8x8 mode:horizontal
8988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
8998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description:
9008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for  uma_8x8 mode:horizontal ,described in sec 8.3.2.2.2
9018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
9028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src
9038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the source
9048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
9058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst
9068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the destination
9078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
9088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd
9098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer source stride
9108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
9118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd
9128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer destination stride
9138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
9148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail
9158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function)
9168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
9178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns
9188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
9198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks
9208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  None
9218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
9228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
9238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S */
9248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_8x8_mode_horz_ssse3(UWORD8 *pu1_src,
9258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                               UWORD8 *pu1_dst,
9268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                               WORD32 src_strd,
9278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                               WORD32 dst_strd,
9288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                               WORD32 ngbr_avail)
9298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{
9308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 *pu1_left = pu1_src + BLK8x8SIZE - 1;
9318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i row1_8x8b, row2_8x8b, row3_8x8b, row4_8x8b;
9328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i row5_8x8b, row6_8x8b, row7_8x8b, row8_8x8b;
9338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
9348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(src_strd);
9358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(ngbr_avail);
9368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
9378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row1_8x8b = _mm_set1_epi8(pu1_left[0]);
9388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row2_8x8b = _mm_set1_epi8(pu1_left[-1]);
9398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row3_8x8b = _mm_set1_epi8(pu1_left[-2]);
9408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row4_8x8b = _mm_set1_epi8(pu1_left[-3]);
9418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row5_8x8b = _mm_set1_epi8(pu1_left[-4]);
9428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row6_8x8b = _mm_set1_epi8(pu1_left[-5]);
9438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row7_8x8b = _mm_set1_epi8(pu1_left[-6]);
9448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    row8_8x8b = _mm_set1_epi8(pu1_left[-7]);
9458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
9468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), row1_8x8b);
9478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), row2_8x8b);
9488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), row3_8x8b);
9498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), row4_8x8b);
9508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), row5_8x8b);
9518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), row6_8x8b);
9528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), row7_8x8b);
9538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), row8_8x8b);
9548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S}
9558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
9568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/**
9578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
9588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
9598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_8x8_mode_dc_ssse3
9608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
9618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief
9628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_8x8 mode:DC
9638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
9648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description:
9658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_8x8 mode:DC ,described in sec 8.3.2.2.4
9668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
9678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src
9688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the source
9698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
9708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst
9718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the destination
9728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
9738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd
9748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer source stride
9758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
9768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd
9778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer destination stride
9788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
9798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail
9808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  availability of neighbouring pixels
9818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
9828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns
9838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
9848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks
9858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  None
9868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
9878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/
9888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_8x8_mode_dc_ssse3(UWORD8 *pu1_src,
9898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                             UWORD8 *pu1_dst,
9908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                             WORD32 src_strd,
9918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                             WORD32 dst_strd,
9928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                             WORD32 ngbr_avail)
9938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{
9948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 u1_useleft; /* availability of left predictors (only for DC) */
9958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 u1_usetop; /* availability of top predictors (only for DC) */
9968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
9978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
9988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i dc_val_8x8b;
9998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    WORD32 dc_val = 0;
10008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(src_strd);
10018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
10028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK);
10038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK);
10048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_top = pu1_src + BLK8x8SIZE + 1;
10058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_left = pu1_src + BLK8x8SIZE - 1;
10068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
10078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    if(u1_useleft || u1_usetop)
10088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    {
10098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        WORD32 shft = 2;
10108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        __m128i val_8x8b, zero_8x8b, sum_8x16b;
10118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
10128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        zero_8x8b = _mm_setzero_si128();
10138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
10148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        if(u1_useleft)
10158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        {
10168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            val_8x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 7));
10178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            sum_8x16b = _mm_sad_epu8(zero_8x8b, val_8x8b);
10188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
10198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            shft++;
10208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            dc_val += 4;
10218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            dc_val += _mm_extract_epi16(sum_8x16b, 0);
10228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        }
10238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        if(u1_usetop)
10248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        {
10258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            val_8x8b = _mm_loadl_epi64((__m128i *)pu1_top);
10268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            sum_8x16b = _mm_sad_epu8(zero_8x8b, val_8x8b);
10278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
10288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            shft++;
10298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            dc_val += 4;
10308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            dc_val += _mm_extract_epi16(sum_8x16b, 0);
10318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        }
10328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        dc_val = dc_val >> shft;
10338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    }
10348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    else
10358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        dc_val = 128;
10368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
10378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dc_val_8x8b = _mm_set1_epi8(dc_val);
10388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
10398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), dc_val_8x8b);
10408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), dc_val_8x8b);
10418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), dc_val_8x8b);
10428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), dc_val_8x8b);
10438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), dc_val_8x8b);
10448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), dc_val_8x8b);
10458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), dc_val_8x8b);
10468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), dc_val_8x8b);
10478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S}
10488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
10498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/**
10508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
10518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
10528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3
10538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
10548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief
10558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left
10568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
10578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description:
10588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left ,described in sec 8.3.2.2.5
10598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
10608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src
10618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the source
10628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
10638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst
10648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the destination
10658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
10668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd
10678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer source stride
10688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
10698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd
10708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer destination stride
10718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
10728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail
10738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function)
10748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
10758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns
10768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
10778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks
10788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  None
10798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
10808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/
10818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3(UWORD8 *pu1_src,
10828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                  UWORD8 *pu1_dst,
10838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                  WORD32 src_strd,
10848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                  WORD32 dst_strd,
10858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                  WORD32 ngbr_avail)
10868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{
10878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
10888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i top_16x8;
10898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i out_15x16;
10908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i a0_8x16, a1_8x16, a2_8x16;
10918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i temp1, temp2;
10928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i res1_8x16, res2_8x16;
10938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i zero = _mm_setzero_si128();
10948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i const_val2_8x16 = _mm_set1_epi16(2);
10958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
10968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(src_strd);
10978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(ngbr_avail);
10988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
10998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_top = pu1_src + BLK8x8SIZE + 1;
11008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
11018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    top_16x8 = _mm_loadu_si128((__m128i *)(pu1_top));
11028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
11038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp1 = _mm_srli_si128(top_16x8, 1);
11048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp2 = _mm_srli_si128(top_16x8, 2);
11058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_unpacklo_epi8(top_16x8, zero);
11068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a1_8x16 = _mm_unpacklo_epi8(temp1, zero);
11078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a2_8x16 = _mm_unpacklo_epi8(temp2, zero);
11088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
11098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
11108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
11118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
11128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
11138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res1_8x16 = _mm_srai_epi16(a0_8x16, 2);
11148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
11158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp2 = _mm_srli_si128(top_16x8, 2);
11168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp1 = _mm_srli_si128(top_16x8, 1);
11178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a2_8x16 = _mm_unpackhi_epi8(temp2, zero);
11188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_unpackhi_epi8(top_16x8, zero);
11198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a2_8x16 = _mm_shufflehi_epi16(a2_8x16, 0x14);
11208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a1_8x16 = _mm_unpackhi_epi8(temp1, zero);
11218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
11228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
11238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
11248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
11258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
11268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res2_8x16 = _mm_srai_epi16(a0_8x16, 2);
11278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
11288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out_15x16 = _mm_packus_epi16(res1_8x16, res2_8x16);
11298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
11308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), out_15x16);
11318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out_15x16 = _mm_srli_si128(out_15x16, 1);
11328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), out_15x16);
11338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out_15x16 = _mm_srli_si128(out_15x16, 1);
11348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), out_15x16);
11358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out_15x16 = _mm_srli_si128(out_15x16, 1);
11368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), out_15x16);
11378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out_15x16 = _mm_srli_si128(out_15x16, 1);
11388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), out_15x16);
11398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out_15x16 = _mm_srli_si128(out_15x16, 1);
11408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), out_15x16);
11418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out_15x16 = _mm_srli_si128(out_15x16, 1);
11428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), out_15x16);
11438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out_15x16 = _mm_srli_si128(out_15x16, 1);
11448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), out_15x16);
11458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S}
11468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
11478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/**
11488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
11498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
11508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3
11518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
11528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief
11538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right
11548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
11558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description:
11568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right ,described in sec 8.3.2.2.6
11578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
11588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src
11598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the source
11608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
11618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst
11628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the destination
11638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
11648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd
11658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer source stride
11668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
11678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd
11688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer destination stride
11698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
11708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail
11718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function)
11728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
11738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns
11748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
11758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks
11768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  None
11778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
11788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/
11798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3(UWORD8 *pu1_src,
11808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                  UWORD8 *pu1_dst,
11818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                  WORD32 src_strd,
11828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                  WORD32 dst_strd,
11838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                  WORD32 ngbr_avail)
11848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{
11858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
11868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
11878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i top_8x8, left_16x8;
11888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i out_15x16;
11898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i a0_8x16, a1_8x16, a2_8x16;
11908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i temp1, temp2;
11918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i res1_8x16, res2_8x16;
11928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i zero = _mm_setzero_si128();
11938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i const_val2_8x16 = _mm_set1_epi16(2);
11948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i str_8x8;
11958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
11968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(src_strd);
11978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(ngbr_avail);
11988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
11998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_left = pu1_src + BLK8x8SIZE - 1;
12008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_top = pu1_src + BLK8x8SIZE + 1;
12018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
12028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    left_16x8 = _mm_loadu_si128((__m128i *)(pu1_left - 7));
12038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
12048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp1 = _mm_srli_si128(left_16x8, 1);
12058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp2 = _mm_srli_si128(left_16x8, 2);
12068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_unpacklo_epi8(left_16x8, zero);
12078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a1_8x16 = _mm_unpacklo_epi8(temp1, zero);
12088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a2_8x16 = _mm_unpacklo_epi8(temp2, zero);
12098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
12108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
12118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
12128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
12138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
12148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res1_8x16 = _mm_srai_epi16(a0_8x16, 2);
12158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
12168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    top_8x8 = _mm_loadu_si128((__m128i *)(pu1_top - 1));
12178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
12188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp1 = _mm_srli_si128(top_8x8, 1);
12198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp2 = _mm_srli_si128(top_8x8, 2);
12208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_unpacklo_epi8(top_8x8, zero);
12218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a1_8x16 = _mm_unpacklo_epi8(temp1, zero);
12228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a2_8x16 = _mm_unpacklo_epi8(temp2, zero);
12238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
12248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
12258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
12268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
12278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
12288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res2_8x16 = _mm_srai_epi16(a0_8x16, 2);
12298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
12308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out_15x16 = _mm_packus_epi16(res1_8x16, res2_8x16);
12318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
12328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out_15x16, 7);
12338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), str_8x8);
12348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out_15x16, 6);
12358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), str_8x8);
12368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out_15x16, 5);
12378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), str_8x8);
12388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out_15x16, 4);
12398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), str_8x8);
12408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out_15x16, 3);
12418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), str_8x8);
12428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out_15x16, 2);
12438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), str_8x8);
12448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out_15x16, 1);
12458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), str_8x8);
12468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), out_15x16);
12478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S}
12488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
12498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/**
12508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
12518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
12528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_8x8_mode_vert_r_ssse3
12538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
12548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief
12558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_8x8 mode:Vertical_Right
12568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
12578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description:
12588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_8x8 mode:Vertical_Right ,described in sec 8.3.2.2.7
12598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
12608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src
12618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the source
12628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
12638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst
12648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the destination
12658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
12668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd
12678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer source stride
12688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
12698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd
12708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer destination stride
12718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
12728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail
12738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function)
12748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
12758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns
12768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
12778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks
12788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  None
12798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
12808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/
12818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_8x8_mode_vert_r_ssse3(UWORD8 *pu1_src,
12828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 UWORD8 *pu1_dst,
12838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 src_strd,
12848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 dst_strd,
12858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 ngbr_avail)
12868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{
12878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
12888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
12898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i top_8x8, left_16x8;
12908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i out1_16x16, out2_16x16;
12918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i a0_8x16, a1_8x16, a2_8x16;
12928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i temp1, temp2;
12938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i res1_8x16, res2_8x16, res3_8x16;
12948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i zero = _mm_setzero_si128();
12958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i const_val2_8x16 = _mm_set1_epi16(2);
12968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i str_8x8;
12978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i mask = _mm_set1_epi32(0xFFFF);
12988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
12998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(src_strd);
13008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(ngbr_avail);
13018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
13028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_left = pu1_src + BLK8x8SIZE - 1;
13038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_top = pu1_src + BLK8x8SIZE + 1;
13048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
13058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    left_16x8 = _mm_loadu_si128((__m128i *)(pu1_left - 6));
13068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
13078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp1 = _mm_srli_si128(left_16x8, 1);
13088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp2 = _mm_srli_si128(left_16x8, 2);
13098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_unpacklo_epi8(left_16x8, zero);
13108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a1_8x16 = _mm_unpacklo_epi8(temp1, zero);
13118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a2_8x16 = _mm_unpacklo_epi8(temp2, zero);
13128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
13138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
13148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
13158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
13168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
13178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res1_8x16 = _mm_srai_epi16(a0_8x16, 2);
13188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
13198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    top_8x8 = _mm_loadu_si128((__m128i *)(pu1_top - 1));
13208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
13218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp1 = _mm_srli_si128(top_8x8, 1);
13228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp2 = _mm_srli_si128(top_8x8, 2);
13238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_unpacklo_epi8(top_8x8, zero);
13248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a1_8x16 = _mm_unpacklo_epi8(temp1, zero);
13258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a2_8x16 = _mm_unpacklo_epi8(temp2, zero);
13268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
13278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res3_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16);
13288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
13298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
13308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
13318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
13328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
13338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res2_8x16 = _mm_srai_epi16(a0_8x16, 2);
13348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
13358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_packus_epi16(res3_8x16, zero);
13368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), str_8x8);
13378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
13388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp1 = _mm_and_si128(res1_8x16, mask);
13398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp1 = _mm_packs_epi32(temp1, temp1);
13408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out1_16x16 = _mm_packus_epi16(temp1, res2_8x16);
13418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
13428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res1_8x16 = _mm_slli_si128(res1_8x16, 2);
13438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp1 = _mm_and_si128(res1_8x16, mask);
13448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp1 = _mm_packs_epi32(temp1, temp1);
13458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out2_16x16 = _mm_packus_epi16(temp1, res3_8x16);
13468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
13478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out1_16x16, 7);
13488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), str_8x8);
13498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
13508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out2_16x16, 7);
13518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), str_8x8);
13528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
13538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out1_16x16, 6);
13548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), str_8x8);
13558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
13568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out2_16x16, 6);
13578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), str_8x8);
13588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
13598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out1_16x16, 5);
13608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), str_8x8);
13618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
13628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out2_16x16, 5);
13638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), str_8x8);
13648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
13658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out1_16x16, 4);
13668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), str_8x8);
13678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S}
13688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
13698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/*
13708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
13718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
13728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_8x8_mode_horz_d_ssse3
13738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
13748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief
13758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_8x8 mode:Horizontal_Down
13768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
13778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description:
13788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_8x8 mode:Horizontal_Down ,described in sec 8.3.2.2.8
13798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
13808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src
13818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the source
13828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
13838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst
13848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the destination
13858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
13868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd
13878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer source stride
13888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
13898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd
13908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer destination stride
13918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
13928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail
13938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function)
13948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
13958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns
13968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
13978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks
13988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  None
13998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
14008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/
14018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_8x8_mode_horz_d_ssse3(UWORD8 *pu1_src,
14028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 UWORD8 *pu1_dst,
14038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 src_strd,
14048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 dst_strd,
14058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 ngbr_avail)
14068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{
14078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
14088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i pels_16x16;
14098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i temp1, temp2, temp3, temp4;
14108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i a0_8x16, a1_8x16, a2_8x16;
14118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i zero = _mm_setzero_si128();
14128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i const_val2_8x16 = _mm_set1_epi16(2);
14138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i res1_8x16, res2_8x16;
14148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i out1_16x16, out2_16x16;
14158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i str_8x8;
14168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(src_strd);
14178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(ngbr_avail);
14188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
14198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_left = pu1_src + BLK8x8SIZE - 1;
14208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
14218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pels_16x16 = _mm_loadu_si128((__m128i *)(pu1_left - 7));
14228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
14238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp1 = _mm_srli_si128(pels_16x16, 1);
14248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp2 = _mm_srli_si128(pels_16x16, 2);
14258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_unpacklo_epi8(pels_16x16, zero);
14268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a1_8x16 = _mm_unpacklo_epi8(temp1, zero);
14278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a2_8x16 = _mm_unpacklo_epi8(temp2, zero);
14288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
14298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res1_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16);
14308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
14318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
14328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
14338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
14348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
14358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res2_8x16 = _mm_srai_epi16(a0_8x16, 2);
14368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
14378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp3 = _mm_unpacklo_epi16(res1_8x16, res2_8x16);
14388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp4 = _mm_unpackhi_epi16(res1_8x16, res2_8x16);
14398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out2_16x16 = _mm_packus_epi16(temp3, temp4);
14408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
14418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_unpackhi_epi8(pels_16x16, zero);
14428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a1_8x16 = _mm_unpackhi_epi8(temp1, zero);
14438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a2_8x16 = _mm_unpackhi_epi8(temp2, zero);
14448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
14458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
14468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
14478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
14488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
14498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res2_8x16 = _mm_srai_epi16(a0_8x16, 2);
14508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
14518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out1_16x16 = _mm_packus_epi16(res2_8x16, zero);
14528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp1 = _mm_srli_si128(out2_16x16, 8);
14538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out1_16x16 = _mm_unpacklo_epi64(temp1, out1_16x16);
14548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
14558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out1_16x16, 6);
14568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), str_8x8);
14578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out1_16x16, 4);
14588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), str_8x8);
14598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out1_16x16, 2);
14608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), str_8x8);
14618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), out1_16x16);
14628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
14638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out2_16x16, 6);
14648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), str_8x8);
14658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out2_16x16, 4);
14668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), str_8x8);
14678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out2_16x16, 2);
14688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), str_8x8);
14698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), out2_16x16);
14708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S}
14718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
14728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/**
14738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
14748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
14758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_8x8_mode_vert_l_ssse3
14768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
14778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief
14788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_8x8 mode:Vertical_Left
14798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
14808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description:
14818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_8x8 mode:Vertical_Left ,described in sec 8.3.2.2.9
14828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
14838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src
14848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the source
14858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
14868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst
14878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the destination
14888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
14898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd
14908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer source stride
14918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
14928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd
14938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer destination stride
14948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
14958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail
14968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function)
14978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
14988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns
14998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
15008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks
15018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  None
15028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
15038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/
15048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
15058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_8x8_mode_vert_l_ssse3(UWORD8 *pu1_src,
15068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 UWORD8 *pu1_dst,
15078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 src_strd,
15088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 dst_strd,
15098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 ngbr_avail)
15108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{
15118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 *pu1_top = NULL; /* Pointer to start of top predictors */
15128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i top_16x16;
15138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i temp1, temp2;
15148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i a0_8x16, a1_8x16, a2_8x16;
15158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i zero = _mm_setzero_si128();
15168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i const_val2_8x16 = _mm_set1_epi16(2);
15178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i res1_8x16, res2_8x16, res3_8x16, res4_8x16;
15188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i out1_16x16, out2_16x16;
15198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(src_strd);
15208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(ngbr_avail);
15218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_top = pu1_src + BLK8x8SIZE + 1;
15228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
15238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    top_16x16 = _mm_loadu_si128((__m128i *)(pu1_top));
15248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp1 = _mm_srli_si128(top_16x16, 1);
15258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp2 = _mm_srli_si128(top_16x16, 2);
15268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_unpacklo_epi8(top_16x16, zero);
15278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a1_8x16 = _mm_unpacklo_epi8(temp1, zero);
15288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a2_8x16 = _mm_unpacklo_epi8(temp2, zero);
15298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
15308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res1_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16);
15318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
15328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
15338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
15348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
15358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
15368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res2_8x16 = _mm_srai_epi16(a0_8x16, 2);
15378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
15388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_unpackhi_epi8(top_16x16, zero);
15398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a1_8x16 = _mm_unpackhi_epi8(temp1, zero);
15408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a2_8x16 = _mm_unpackhi_epi8(temp2, zero);
15418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
15428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res3_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16);
15438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
15448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
15458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
15468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
15478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
15488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res4_8x16 = _mm_srai_epi16(a0_8x16, 2);
15498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
15508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out1_16x16 = _mm_packus_epi16(res1_8x16, res3_8x16);
15518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out2_16x16 = _mm_packus_epi16(res2_8x16, res4_8x16);
15528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
15538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), out1_16x16);
15548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), out2_16x16);
15558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out1_16x16 = _mm_srli_si128(out1_16x16, 1);
15568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out2_16x16 = _mm_srli_si128(out2_16x16, 1);
15578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), out1_16x16);
15588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), out2_16x16);
15598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out1_16x16 = _mm_srli_si128(out1_16x16, 1);
15608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out2_16x16 = _mm_srli_si128(out2_16x16, 1);
15618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), out1_16x16);
15628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), out2_16x16);
15638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out1_16x16 = _mm_srli_si128(out1_16x16, 1);
15648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out2_16x16 = _mm_srli_si128(out2_16x16, 1);
15658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), out1_16x16);
15668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), out2_16x16);
15678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S}
15688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
15698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/**
15708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
15718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
15728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * ih264_intra_pred_luma_8x8_mode_horz_u_ssse3
15738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
15748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief
15758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_8x8 mode:Horizontal_Up
15768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
15778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description:
15788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_8x8 mode:Horizontal_Up ,described in sec 8.3.2.2.10
15798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
15808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src
15818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the source
15828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
15838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst
15848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the destination
15858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
15868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd
15878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer source stride
15888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
15898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd
15908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer destination stride
15918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
15928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail
15938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function)
15948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
15958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns
15968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
15978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks
15988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  None
15998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
16008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/
16018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_8x8_mode_horz_u_ssse3(UWORD8 *pu1_src,
16028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 UWORD8 *pu1_dst,
16038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 src_strd,
16048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 dst_strd,
16058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 ngbr_avail)
16068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{
16078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 *pu1_left = NULL; /* Pointer to start of left predictors */
16088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i left_16x16;
16098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i temp1, temp2;
16108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i a0_8x16, a1_8x16, a2_8x16;
16118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i zero = _mm_setzero_si128();
16128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i const_val2_8x16 = _mm_set1_epi16(2);
16138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i res1_8x16, res2_8x16;
16148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i out1_16x16;
16158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i str_8x8;
16168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i shuffle_16x16;
16178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(src_strd);
16188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(ngbr_avail);
16198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
16208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_left = pu1_src + BLK8x8SIZE - 1;
16218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    shuffle_16x16 = _mm_set_epi8(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
16228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E,
16238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                 0x0F);
16248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
16258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    left_16x16 = _mm_loadu_si128((__m128i *)(pu1_left - 7));
16268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp1 = _mm_srli_si128(left_16x16, 1);
16278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_unpacklo_epi8(left_16x16, zero);
16288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_slli_si128(a0_8x16, 2);
16298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a1_8x16 = _mm_unpacklo_epi8(left_16x16, zero);
16308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_shufflelo_epi16(a0_8x16, 0xE5);
16318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a2_8x16 = _mm_unpacklo_epi8(temp1, zero);
16328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
16338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res1_8x16 = _mm_avg_epu16(a0_8x16, a1_8x16);
16348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
16358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, a2_8x16);
16368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a1_8x16 = _mm_add_epi16(a1_8x16, a1_8x16);
16378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, const_val2_8x16);
16388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    a0_8x16 = _mm_add_epi16(a0_8x16, a1_8x16);
16398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    res2_8x16 = _mm_srai_epi16(a0_8x16, 2);
16408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
16418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp1 = _mm_unpacklo_epi16(res1_8x16, res2_8x16);
16428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp2 = _mm_unpackhi_epi16(res1_8x16, res2_8x16);
16438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out1_16x16 = _mm_packus_epi16(temp1, temp2);
16448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    out1_16x16 = _mm_shuffle_epi8(out1_16x16, shuffle_16x16);
16458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
16468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out1_16x16, 1);
16478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 0 * dst_strd), str_8x8);
16488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out1_16x16, 3);
16498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 1 * dst_strd), str_8x8);
16508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out1_16x16, 5);
16518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 2 * dst_strd), str_8x8);
16528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(out1_16x16, 7);
16538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 3 * dst_strd), str_8x8);
16548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    temp1 = _mm_set1_epi8(pu1_left[-7]);
16558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_unpacklo_epi64(str_8x8, temp1);
16568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(str_8x8, 2);
16578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 4 * dst_strd), str_8x8);
16588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(str_8x8, 2);
16598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 5 * dst_strd), str_8x8);
16608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(str_8x8, 2);
16618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 6 * dst_strd), str_8x8);
16628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    str_8x8 = _mm_srli_si128(str_8x8, 2);
16638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storel_epi64((__m128i *)(pu1_dst + 7 * dst_strd), str_8x8);
16648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
16658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S}
16668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
16678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
16688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/*******************    16x16 Modes    *******************/
16698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
16708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/**
16718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
16728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
16738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *ih264_intra_pred_luma_16x16_mode_vert_ssse3
16748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
16758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief
16768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_16x16 mode:Vertical
16778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
16788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description:
16798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_16x16 mode:Vertical, described in sec 8.3.3.1
16808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
16818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src
16828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the source
16838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
16848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst
16858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the destination
16868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
16878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd
16888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer source stride
16898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
16908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd
16918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer destination stride
16928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
16938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail
16948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  availability of neighbouring pixels (Not used in this function)
16958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
16968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns
16978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
16988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks
16998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  None
17008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
17018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/
17028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_16x16_mode_vert_ssse3(UWORD8 *pu1_src,
17038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 UWORD8 *pu1_dst,
17048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 src_strd,
17058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 dst_strd,
17068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 ngbr_avail)
17078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{
17088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 *pu1_top;
17098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    WORD32 dst_strd2, dst_strd3, dst_strd4;
17108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
17118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i top_16x8b;
17128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
17138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(src_strd);
17148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(ngbr_avail);
17158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
17168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_top = pu1_src + MB_SIZE + 1;
17178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
17188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd2 = dst_strd << 1;
17198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd4 = dst_strd << 2;
17208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
17218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    top_16x8b = _mm_loadu_si128((__m128i *)pu1_top);
17228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
17238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd3 = dst_strd + dst_strd2;
17248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
17258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
17268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b);
17278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), top_16x8b);
17288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), top_16x8b);
17298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_dst += dst_strd4;
17308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
17318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
17328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b);
17338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), top_16x8b);
17348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), top_16x8b);
17358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_dst += dst_strd4;
17368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
17378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
17388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b);
17398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), top_16x8b);
17408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), top_16x8b);
17418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_dst += dst_strd4;
17428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
17438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)pu1_dst, top_16x8b);
17448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), top_16x8b);
17458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), top_16x8b);
17468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), top_16x8b);
17478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S}
17488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
17498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/**
17508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
17518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
17528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *ih264_intra_pred_luma_16x16_mode_horz_ssse3
17538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
17548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief
17558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_16x16 mode:Horizontal
17568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
17578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description:
17588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for luma_16x16 mode:Horizontal, described in sec 8.3.3.2
17598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
17608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src
17618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the source
17628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
17638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst
17648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the destination
17658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
17668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd
17678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer source stride
17688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
17698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd
17708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer destination stride
17718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
17728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail
17738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function)
17748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
17758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns
17768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
17778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks
17788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  None
17798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
17808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/
17818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_16x16_mode_horz_ssse3(UWORD8 *pu1_src,
17828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 UWORD8 *pu1_dst,
17838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 src_strd,
17848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 dst_strd,
17858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                 WORD32 ngbr_avail)
17868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{
17878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 *pu1_left;
17888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    WORD32 dst_strd2, dst_strd3, dst_strd4;
17898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
17908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i row1_16x8b, row2_16x8b, row3_16x8b, row4_16x8b;
17918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
17928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(src_strd);
17938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(ngbr_avail);
17948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
17958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_left = pu1_src + MB_SIZE - 1;
17968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
17978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd4 = dst_strd << 2;
17988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
17998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd2 = dst_strd << 1;
18008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd3 = dst_strd4 - dst_strd;
18018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
1802796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row1_16x8b = _mm_set1_epi8(*(pu1_left));
1803796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row2_16x8b = _mm_set1_epi8(*(pu1_left - 1));
1804796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row3_16x8b = _mm_set1_epi8(*(pu1_left - 2));
1805796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row4_16x8b = _mm_set1_epi8(*(pu1_left - 3));
18068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
18078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
18088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
18098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), row3_16x8b);
18108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), row4_16x8b);
18118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
18128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_dst += dst_strd4;
1813796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row1_16x8b = _mm_set1_epi8(*(pu1_left - 4));
1814796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row2_16x8b = _mm_set1_epi8(*(pu1_left - 5));
1815796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row3_16x8b = _mm_set1_epi8(*(pu1_left - 6));
1816796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row4_16x8b = _mm_set1_epi8(*(pu1_left - 7));
18178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
18188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
18198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
18208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), row3_16x8b);
18218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), row4_16x8b);
18228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
18238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_dst += dst_strd4;
1824796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row1_16x8b = _mm_set1_epi8(*(pu1_left - 8));
1825796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row2_16x8b = _mm_set1_epi8(*(pu1_left - 9));
1826796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row3_16x8b = _mm_set1_epi8(*(pu1_left - 10));
1827796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row4_16x8b = _mm_set1_epi8(*(pu1_left - 11));
18288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
18298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
18308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
18318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), row3_16x8b);
18328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), row4_16x8b);
18338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
18348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_dst += dst_strd4;
1835796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row1_16x8b = _mm_set1_epi8(*(pu1_left - 12));
1836796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row2_16x8b = _mm_set1_epi8(*(pu1_left - 13));
1837796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row3_16x8b = _mm_set1_epi8(*(pu1_left - 14));
1838796e3c8de825c078c77b9fd83abca8c7f79d1127Naveen Kumar Ponnusamy    row4_16x8b = _mm_set1_epi8(*(pu1_left - 15));
18398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
18408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)pu1_dst, row1_16x8b);
18418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), row2_16x8b);
18428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), row3_16x8b);
18438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), row4_16x8b);
18448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S}
18458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
18468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/**
18478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
18488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
18498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *ih264_intra_pred_luma_16x16_mode_dc_ssse3
18508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
18518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief
18528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for  luma_16x16 mode:DC
18538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
18548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description:
18558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for  luma_16x16 mode:DC, described in sec 8.3.3.3
18568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
18578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src
18588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the source
18598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
18608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst
18618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the destination
18628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
18638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd
18648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer source stride
18658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
18668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd
18678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer destination stride
18688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
18698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S ** @param[in] ngbr_avail
18708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  availability of neighbouring pixels
18718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
18728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns
18738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
18748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks
18758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  None
18768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
18778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/
18788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_16x16_mode_dc_ssse3(UWORD8 *pu1_src,
18798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                               UWORD8 *pu1_dst,
18808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                               WORD32 src_strd,
18818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                               WORD32 dst_strd,
18828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                               WORD32 ngbr_avail)
18838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{
18848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    WORD8 u1_useleft, u1_usetop;
18858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    WORD32 dc_val;
18868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
18878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    WORD32 dst_strd2, dst_strd3, dst_strd4;
18888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
18898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i dc_val_16x8b;
18908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
18918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(src_strd);
18928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
18938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    u1_useleft = BOOLEAN(ngbr_avail & LEFT_MB_AVAILABLE_MASK);
18948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    u1_usetop = BOOLEAN(ngbr_avail & TOP_MB_AVAILABLE_MASK);
18958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
18968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    if(u1_useleft || u1_usetop)
18978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    {
18988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        WORD32 shft;
18998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        __m128i val_16x8b, zero_16x8b, sum_8x16b;
19008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
19018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        dc_val = 0;
19028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        shft = 3;
19038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
19048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        zero_16x8b = _mm_setzero_si128();
19058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
19068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        if(u1_useleft)
19078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        {
19088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            UWORD8 *pu1_left;
19098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
19108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            pu1_left = pu1_src + MB_SIZE - 1;
19118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
19128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            val_16x8b = _mm_loadu_si128((__m128i *)(pu1_left - 15));
19138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            sum_8x16b = _mm_sad_epu8(zero_16x8b, val_16x8b);
19148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
19158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            shft++;
19168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            dc_val += 8;
19178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            dc_val += _mm_extract_epi16(sum_8x16b, 0);
19188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            dc_val += _mm_extract_epi16(sum_8x16b, 4);
19198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        }
19208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        if(u1_usetop)
19218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        {
19228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            UWORD8 *pu1_top;
19238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
19248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            pu1_top = pu1_src + MB_SIZE + 1;
19258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
19268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            val_16x8b = _mm_loadu_si128((__m128i *)pu1_top);
19278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            sum_8x16b = _mm_sad_epu8(zero_16x8b, val_16x8b);
19288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
19298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            shft++;
19308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            dc_val += 8;
19318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            dc_val += _mm_extract_epi16(sum_8x16b, 0);
19328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S            dc_val += _mm_extract_epi16(sum_8x16b, 4);
19338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        }
19348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        dc_val = dc_val >> shft;
19358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    }
19368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    else
19378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        dc_val = 128;
19388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
19398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dc_val_16x8b =  _mm_set1_epi8(dc_val);
19408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
19418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd2 = dst_strd << 1;
19428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd4 = dst_strd << 2;
19438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    dst_strd3 = dst_strd + dst_strd2;
19448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
19458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)pu1_dst, dc_val_16x8b);
19468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), dc_val_16x8b);
19478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), dc_val_16x8b);
19488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), dc_val_16x8b);
19498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_dst += dst_strd4;
19508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
19518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)pu1_dst, dc_val_16x8b);
19528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), dc_val_16x8b);
19538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), dc_val_16x8b);
19548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), dc_val_16x8b);
19558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_dst += dst_strd4;
19568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
19578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)pu1_dst, dc_val_16x8b);
19588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), dc_val_16x8b);
19598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), dc_val_16x8b);
19608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), dc_val_16x8b);
19618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_dst += dst_strd4;
19628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
19638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)pu1_dst, dc_val_16x8b);
19648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), dc_val_16x8b);
19658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd2), dc_val_16x8b);
19668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd3), dc_val_16x8b);
19678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S}
19688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
19698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S/**
19708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************
19718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
19728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *ih264_intra_pred_luma_16x16_mode_plane_ssse3
19738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
19748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @brief
19758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for  luma_16x16 mode:PLANE
19768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
19778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @par Description:
19788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  Perform Intra prediction for  luma_16x16 mode:PLANE, described in sec 8.3.3.4
19798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
19808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] pu1_src
19818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the source
19828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
19838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[out] pu1_dst
19848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  UWORD8 pointer to the destination
19858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
19868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] src_strd
19878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer source stride
19888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
19898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] dst_strd
19908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  integer destination stride
19918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
19928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @param[in] ngbr_avail
19938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * availability of neighbouring pixels(Not used in this function)
19948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
19958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @returns
19968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
19978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S * @remarks
19988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *  None
19998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *
20008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S *******************************************************************************/
20018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha Svoid ih264_intra_pred_luma_16x16_mode_plane_ssse3(UWORD8 *pu1_src,
20028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                  UWORD8 *pu1_dst,
20038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                  WORD32 src_strd,
20048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                  WORD32 dst_strd,
20058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S                                                  WORD32 ngbr_avail)
20068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S{
20078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UWORD8 *pu1_left, *pu1_top;
20088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    WORD32 a, b, c;
20098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
20108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    __m128i rev_8x16b, mul_8x16b, zero_16x8b;
20118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
20128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(src_strd);
20138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    UNUSED(ngbr_avail);
20148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
20158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_top = pu1_src + MB_SIZE + 1;
20168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    pu1_left = pu1_src + MB_SIZE - 1;
20178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
20188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    rev_8x16b = _mm_setr_epi16(0x0f0e, 0x0d0c, 0x0b0a, 0x0908, 0x0706, 0x0504, 0x0302, 0x0100);
20198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    //used to reverse the order of 16-bit values in a vector
20208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
20218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    mul_8x16b = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
20228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    zero_16x8b = _mm_setzero_si128();
20238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
20248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    //calculating a, b and c
20258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    {
20268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        WORD32 h, v;
20278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
20288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        __m128i h_val1_16x8b, h_val2_16x8b;
20298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        __m128i h_val1_8x16b, h_val2_8x16b, h_val_4x32b;
20308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        __m128i v_val1_16x8b, v_val2_16x8b;
20318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        __m128i v_val1_8x16b, v_val2_8x16b, v_val_4x32b;
20328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        __m128i hv_val_4x32b;
20338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
20348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        a = (pu1_top[15] + pu1_left[-15]) << 4;
20358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
20368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        h_val1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_top + 8));
20378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        h_val2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_top - 1));
20388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        v_val1_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 15));
20398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        v_val2_16x8b = _mm_loadl_epi64((__m128i *)(pu1_left - 6));
20408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
20418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        h_val1_8x16b = _mm_unpacklo_epi8(h_val1_16x8b, zero_16x8b);
20428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        h_val2_8x16b = _mm_unpacklo_epi8(h_val2_16x8b, zero_16x8b);
20438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        v_val1_8x16b = _mm_unpacklo_epi8(v_val1_16x8b, zero_16x8b);
20448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        v_val2_8x16b = _mm_unpacklo_epi8(v_val2_16x8b, zero_16x8b);
20458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
20468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        h_val2_8x16b = _mm_shuffle_epi8(h_val2_8x16b, rev_8x16b);
20478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        v_val1_8x16b = _mm_shuffle_epi8(v_val1_8x16b, rev_8x16b);
20488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
20498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        h_val1_8x16b = _mm_sub_epi16(h_val1_8x16b, h_val2_8x16b);
20508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        v_val1_8x16b = _mm_sub_epi16(v_val1_8x16b, v_val2_8x16b);
20518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
20528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        h_val_4x32b = _mm_madd_epi16(mul_8x16b, h_val1_8x16b);
20538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        v_val_4x32b = _mm_madd_epi16(mul_8x16b, v_val1_8x16b);
20548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
20558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        hv_val_4x32b = _mm_hadd_epi32(h_val_4x32b, v_val_4x32b);
20568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        hv_val_4x32b = _mm_hadd_epi32(hv_val_4x32b, hv_val_4x32b);
20578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
20588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        h = _mm_extract_epi16(hv_val_4x32b, 0);
20598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        v = _mm_extract_epi16(hv_val_4x32b, 2);
20608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        h = (h << 16) >> 16;
20618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        v = (v << 16) >> 16;
20628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
20638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        b = ((h << 2) + h + 32) >> 6;
20648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        c = ((v << 2) + v + 32) >> 6;
20658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    }
20668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
20678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    //using a, b and c to compute the fitted plane values
20688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    {
20698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        __m128i const_8x16b, b_8x16b, c_8x16b, c2_8x16b;
20708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        __m128i res1_l_8x16b, res1_h_8x16b;
20718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        __m128i res2_l_8x16b, res2_h_8x16b;
20728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        __m128i res1_sh_l_8x16b, res1_sh_h_8x16b, res1_16x8b;
20738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        __m128i res2_sh_l_8x16b, res2_sh_h_8x16b, res2_16x8b;
20748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
20758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        b_8x16b = _mm_set1_epi16(b);
20768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        c_8x16b = _mm_set1_epi16(c);
20778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        c2_8x16b = _mm_set1_epi16(c << 1);
20788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        const_8x16b = _mm_set1_epi16(a - c*7 + 16);
20798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
20808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_h_8x16b = _mm_mullo_epi16(mul_8x16b, b_8x16b);
20818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        //contains {b*1, b*2, b*3,... b*8}
20828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
20838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_l_8x16b = _mm_shuffle_epi8(res1_h_8x16b, rev_8x16b);
20848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_l_8x16b = _mm_srli_si128(res1_l_8x16b, 2);
20858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_l_8x16b = _mm_sub_epi16(zero_16x8b, res1_l_8x16b);
20868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        //contains {-b*7, -b*6,... -b*1, b*0}
20878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
20888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        // rows 1, 2
20898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, const_8x16b);
20908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, const_8x16b);
20918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_h_8x16b = _mm_add_epi16(res1_h_8x16b, c_8x16b);
20928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_l_8x16b = _mm_add_epi16(res1_l_8x16b, c_8x16b);
20938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
20948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
20958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
20968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
20978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
20988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
20998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
21008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
21018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
21038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
21048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        // rows 3, 4
21068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
21078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
21088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
21098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
21108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
21128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
21138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
21148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
21158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        pu1_dst += dst_strd << 1;
21178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
21198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
21208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
21228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
21238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        // rows 5, 6
21258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
21268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
21278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
21288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
21298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
21318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
21328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
21338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
21348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        pu1_dst += dst_strd << 1;
21368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
21388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
21398d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21408d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
21418d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
21428d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21438d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        // rows 7, 8
21448d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
21458d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
21468d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
21478d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
21488d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21498d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
21508d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
21518d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
21528d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
21538d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21548d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        pu1_dst += dst_strd << 1;
21558d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21568d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
21578d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
21588d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21598d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
21608d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
21618d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21628d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        // rows 9, 10
21638d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
21648d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
21658d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
21668d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
21678d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21688d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
21698d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
21708d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
21718d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
21728d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21738d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        pu1_dst += dst_strd << 1;
21748d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21758d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
21768d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
21778d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21788d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
21798d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
21808d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21818d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        // rows 11, 12
21828d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
21838d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
21848d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
21858d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
21868d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21878d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
21888d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
21898d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
21908d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
21918d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21928d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        pu1_dst += dst_strd << 1;
21938d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21948d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
21958d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
21968d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
21978d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
21988d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
21998d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
22008d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        // rows 13, 14
22018d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
22028d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
22038d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
22048d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
22058d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
22068d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
22078d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
22088d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
22098d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
22108d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
22118d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        pu1_dst += dst_strd << 1;
22128d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
22138d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
22148d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
22158d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
22168d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
22178d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
22188d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
22198d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        // rows 15, 16
22208d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_h_8x16b = _mm_add_epi16(res1_h_8x16b, c2_8x16b);
22218d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_l_8x16b = _mm_add_epi16(res1_l_8x16b, c2_8x16b);
22228d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_h_8x16b = _mm_add_epi16(res2_h_8x16b, c2_8x16b);
22238d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_l_8x16b = _mm_add_epi16(res2_l_8x16b, c2_8x16b);
22248d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
22258d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_sh_h_8x16b = _mm_srai_epi16(res1_h_8x16b, 5);
22268d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_sh_l_8x16b = _mm_srai_epi16(res1_l_8x16b, 5);
22278d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_sh_h_8x16b = _mm_srai_epi16(res2_h_8x16b, 5);
22288d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_sh_l_8x16b = _mm_srai_epi16(res2_l_8x16b, 5);
22298d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
22308d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        pu1_dst += dst_strd << 1;
22318d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
22328d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res1_16x8b = _mm_packus_epi16(res1_sh_l_8x16b, res1_sh_h_8x16b);
22338d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        res2_16x8b = _mm_packus_epi16(res2_sh_l_8x16b, res2_sh_h_8x16b);
22348d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S
22358d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        _mm_storeu_si128((__m128i *)pu1_dst, res1_16x8b);
22368d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S        _mm_storeu_si128((__m128i *)(pu1_dst + dst_strd), res2_16x8b);
22378d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S    }
22388d3d303c7942ced6a987a52db8977d768dc3605fHamsalekha S}
2239