10d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/******************************************************************************
20d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*
30d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
40d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*
50d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Licensed under the Apache License, Version 2.0 (the "License");
60d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* you may not use this file except in compliance with the License.
70d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* You may obtain a copy of the License at:
80d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*
90d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* http://www.apache.org/licenses/LICENSE-2.0
100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*
110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* Unless required by applicable law or agreed to in writing, software
120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* distributed under the License is distributed on an "AS IS" BASIS,
130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* See the License for the specific language governing permissions and
150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* limitations under the License.
160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*
170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar******************************************************************************/
180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar/**
190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*******************************************************************************
200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @file
210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*  ihevcd_frm_cvt_x86_intr.c
220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*
230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @brief
240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*  Platform specific intrinsic implementation of certain functions
250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*
260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @author
270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*  Ittiam
280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @par List of Functions:
290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*  - ihevcd_itrans_recon_dc
300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*  - ihevcd_fmt_conv_420sp_to_420p
310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*
320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar* @remarks
330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*  None
340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*
350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*******************************************************************************
360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar*/
370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "string.h"
380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_typedefs.h"
390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_defs.h"
400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_macros.h"
410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevc_platform_macros.h"
420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include "ihevcd_function_selector.h"
430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include <string.h>
440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar#include <immintrin.h>
450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakarvoid ihevcd_fmt_conv_420sp_to_420p_ssse3(UWORD8 *pu1_y_src,
480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                                         UWORD8 *pu1_uv_src,
490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                                         UWORD8 *pu1_y_dst,
500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                                         UWORD8 *pu1_u_dst,
510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                                         UWORD8 *pu1_v_dst,
520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                                         WORD32 wd,
530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                                         WORD32 ht,
540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                                         WORD32 src_y_strd,
550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                                         WORD32 src_uv_strd,
560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                                         WORD32 dst_y_strd,
570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                                         WORD32 dst_uv_strd,
580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                                         WORD32 is_u_first,
590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                                         WORD32 disable_luma_copy)
600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar{
610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    UWORD8 *pu1_src, *pu1_dst;
620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    UWORD8 *pu1_u_src, *pu1_v_src;
630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    WORD32 num_rows, num_cols, src_strd, dst_strd, cols, rows;
640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    WORD32 i, j;
650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    cols = 0;
670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    pu1_u_src = (UWORD8 *)pu1_uv_src;
680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    pu1_v_src = (UWORD8 *)pu1_uv_src + 1;
690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    if(0 == disable_luma_copy)
700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    {
710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        /* copy luma */
720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        pu1_src = (UWORD8 *)pu1_y_src;
730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        pu1_dst = (UWORD8 *)pu1_y_dst;
740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        num_rows = ht;
760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        num_cols = wd;
770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        src_strd = src_y_strd;
790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        dst_strd = dst_y_strd;
800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        for(i = 0; i < num_rows; i++)
810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        {
820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            memcpy(pu1_dst, pu1_src, num_cols);
830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            pu1_dst += dst_strd;
840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            pu1_src += src_strd;
850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        }
860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    }
870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    /* de-interleave U and V and copy to destination */
890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    if(!is_u_first)
900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    {
910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        UWORD8 *temp = pu1_u_dst;
920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        pu1_u_dst = pu1_v_dst;
930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        pu1_v_dst = temp;
940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        pu1_u_src = (UWORD8 *)pu1_uv_src + 1;
960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        pu1_v_src = (UWORD8 *)pu1_uv_src;
970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    }
980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    {
1000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        __m128i src_uv0_8x16b, src_uv1_8x16b, src_u_8x16b, src_v_8x16b;
1010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        __m128i temp0_8x16b, temp1_8x16b, alt_first_mask;
1020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        UWORD8 FIRST_ALT_SHUFFLE[16] = {
1040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            0x00, 0x02, 0x04, 0x06,
1050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            0x08, 0x0A, 0x0C, 0x0E,
1060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            0x01, 0x03, 0x05, 0x07,
1070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            0x09, 0x0B, 0x0D, 0x0F };
1080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        PREFETCH((char const *)(pu1_uv_src + (0 * src_uv_strd)), _MM_HINT_T0)
1100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        PREFETCH((char const *)(pu1_uv_src + (1 * src_uv_strd)), _MM_HINT_T0)
1110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        PREFETCH((char const *)(pu1_uv_src + (2 * src_uv_strd)), _MM_HINT_T0)
1120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        PREFETCH((char const *)(pu1_uv_src + (3 * src_uv_strd)), _MM_HINT_T0)
1130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        PREFETCH((char const *)(pu1_uv_src + (4 * src_uv_strd)), _MM_HINT_T0)
1140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        PREFETCH((char const *)(pu1_uv_src + (5 * src_uv_strd)), _MM_HINT_T0)
1150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        PREFETCH((char const *)(pu1_uv_src + (6 * src_uv_strd)), _MM_HINT_T0)
1160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        PREFETCH((char const *)(pu1_uv_src + (7 * src_uv_strd)), _MM_HINT_T0)
1170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        num_rows = ht >> 1;
1190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        num_cols = wd >> 1;
1200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        src_strd = src_uv_strd;
1220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        dst_strd = dst_uv_strd;
1230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        alt_first_mask = _mm_loadu_si128((__m128i *)&FIRST_ALT_SHUFFLE[0]);
1250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        if(num_cols > 15)
1270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        {
1280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            cols = num_cols >> 4;
1290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            for(i = 0; i < (num_rows >> 2); i++)
1310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            {
1320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                UWORD8 *pu1_uv_src_temp, *pu1_u_dst_temp, *pu1_v_dst_temp;
1330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                PREFETCH((char const *)(pu1_uv_src + (8 * src_strd)), _MM_HINT_T0)
1350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                PREFETCH((char const *)(pu1_uv_src + (9 * src_strd)), _MM_HINT_T0)
1360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                PREFETCH((char const *)(pu1_uv_src + (10 * src_strd)), _MM_HINT_T0)
1370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                PREFETCH((char const *)(pu1_uv_src + (11 * src_strd)), _MM_HINT_T0)
1380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                pu1_uv_src_temp = pu1_uv_src;
1400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                pu1_u_dst_temp =  pu1_u_dst;
1410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                pu1_v_dst_temp =  pu1_v_dst;
1420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                for(j = 0; j < cols; j++)
1440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                {
1450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    /**** Row 0 ***/
1470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    src_uv0_8x16b = _mm_loadu_si128((__m128i *)pu1_uv_src_temp);
1480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    src_uv1_8x16b = _mm_loadu_si128((__m128i *)(pu1_uv_src_temp + 16));
1490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    temp0_8x16b = _mm_shuffle_epi8(src_uv0_8x16b, alt_first_mask);
1510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    temp1_8x16b = _mm_shuffle_epi8(src_uv1_8x16b, alt_first_mask);
1520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    src_u_8x16b = _mm_unpacklo_epi64(temp0_8x16b, temp1_8x16b);
1540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    src_v_8x16b = _mm_unpackhi_epi64(temp0_8x16b, temp1_8x16b);
1550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    _mm_storeu_si128((__m128i *)(pu1_u_dst_temp), src_u_8x16b);
1570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    _mm_storeu_si128((__m128i *)(pu1_v_dst_temp), src_v_8x16b);
1580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    /**** Row 1 ***/
1600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    src_uv0_8x16b = _mm_loadu_si128((__m128i *)(pu1_uv_src_temp + (1 * src_strd)));
1610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    src_uv1_8x16b = _mm_loadu_si128((__m128i *)(pu1_uv_src_temp + (1 * src_strd) + 16));
1620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    temp0_8x16b = _mm_shuffle_epi8(src_uv0_8x16b, alt_first_mask);
1640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    temp1_8x16b = _mm_shuffle_epi8(src_uv1_8x16b, alt_first_mask);
1650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    src_u_8x16b = _mm_unpacklo_epi64(temp0_8x16b, temp1_8x16b);
1670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    src_v_8x16b = _mm_unpackhi_epi64(temp0_8x16b, temp1_8x16b);
1680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    _mm_storeu_si128((__m128i *)(pu1_u_dst_temp + (1 * dst_strd)), src_u_8x16b);
1700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    _mm_storeu_si128((__m128i *)(pu1_v_dst_temp + (1 * dst_strd)), src_v_8x16b);
1710d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1720d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    /**** Row 2 ***/
1730d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    src_uv0_8x16b = _mm_loadu_si128((__m128i *)(pu1_uv_src_temp + (2 * src_strd)));
1740d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    src_uv1_8x16b = _mm_loadu_si128((__m128i *)(pu1_uv_src_temp + (2 * src_strd) + 16));
1750d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1760d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    temp0_8x16b = _mm_shuffle_epi8(src_uv0_8x16b, alt_first_mask);
1770d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    temp1_8x16b = _mm_shuffle_epi8(src_uv1_8x16b, alt_first_mask);
1780d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1790d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    src_u_8x16b = _mm_unpacklo_epi64(temp0_8x16b, temp1_8x16b);
1800d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    src_v_8x16b = _mm_unpackhi_epi64(temp0_8x16b, temp1_8x16b);
1810d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1820d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    _mm_storeu_si128((__m128i *)(pu1_u_dst_temp + (2 * dst_strd)), src_u_8x16b);
1830d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    _mm_storeu_si128((__m128i *)(pu1_v_dst_temp + (2 * dst_strd)), src_v_8x16b);
1840d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1850d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    /**** Row 3 ***/
1860d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    src_uv0_8x16b = _mm_loadu_si128((__m128i *)(pu1_uv_src_temp + (3 * src_strd)));
1870d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    src_uv1_8x16b = _mm_loadu_si128((__m128i *)(pu1_uv_src_temp + (3 * src_strd) + 16));
1880d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1890d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    temp0_8x16b = _mm_shuffle_epi8(src_uv0_8x16b, alt_first_mask);
1900d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    temp1_8x16b = _mm_shuffle_epi8(src_uv1_8x16b, alt_first_mask);
1910d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1920d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    src_u_8x16b = _mm_unpacklo_epi64(temp0_8x16b, temp1_8x16b);
1930d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    src_v_8x16b = _mm_unpackhi_epi64(temp0_8x16b, temp1_8x16b);
1940d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1950d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    _mm_storeu_si128((__m128i *)(pu1_u_dst_temp + (3 * dst_strd)), src_u_8x16b);
1960d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    _mm_storeu_si128((__m128i *)(pu1_v_dst_temp + (3 * dst_strd)), src_v_8x16b);
1970d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
1980d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    pu1_u_dst_temp += 16;
1990d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    pu1_v_dst_temp += 16;
2000d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    pu1_uv_src_temp += 32;
2010d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                }
2020d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2030d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                pu1_u_dst += 4 * dst_strd;
2040d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                pu1_v_dst += 4 * dst_strd;
2050d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                pu1_uv_src += 4 * src_strd;
2060d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                //pu1_v_src += src_strd;
2070d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            }
2080d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            rows = num_rows & 0x3;
2090d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            if(rows)
2100d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            {
2110d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                for(i = 0; i < rows; i++)
2120d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                {
2130d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    UWORD8 *pu1_uv_src_temp, *pu1_u_dst_temp, *pu1_v_dst_temp;
2140d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2150d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    pu1_uv_src_temp = pu1_uv_src;
2160d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    pu1_u_dst_temp =  pu1_u_dst;
2170d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    pu1_v_dst_temp =  pu1_v_dst;
2180d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2190d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    for(j = 0; j < cols; j++)
2200d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    {
2210d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2220d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                        src_uv0_8x16b = _mm_loadu_si128((__m128i *)pu1_uv_src_temp);
2230d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                        src_uv1_8x16b = _mm_loadu_si128((__m128i *)(pu1_uv_src_temp + 16));
2240d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2250d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                        temp0_8x16b = _mm_shuffle_epi8(src_uv0_8x16b, alt_first_mask);
2260d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                        temp1_8x16b = _mm_shuffle_epi8(src_uv1_8x16b, alt_first_mask);
2270d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2280d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                        src_u_8x16b = _mm_unpacklo_epi64(temp0_8x16b, temp1_8x16b);
2290d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                        src_v_8x16b = _mm_unpackhi_epi64(temp0_8x16b, temp1_8x16b);
2300d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2310d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                        _mm_storeu_si128((__m128i *)(pu1_u_dst_temp), src_u_8x16b);
2320d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                        _mm_storeu_si128((__m128i *)(pu1_v_dst_temp), src_v_8x16b);
2330d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2340d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                        pu1_u_dst_temp += 16;
2350d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                        pu1_v_dst_temp += 16;
2360d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                        pu1_uv_src_temp += 32;
2370d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    }
2380d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2390d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    pu1_u_dst += dst_strd;
2400d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    pu1_v_dst += dst_strd;
2410d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    pu1_uv_src += src_strd;
2420d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                }
2430d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            }
2440d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            pu1_u_dst -= (num_rows * dst_strd);
2450d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            pu1_v_dst -= (num_rows * dst_strd);
2460d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            num_cols &= 0x0F;
2470d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        }
2480d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        if(num_cols)
2490d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        {
2500d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            pu1_u_dst += (cols << 4);
2510d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            pu1_v_dst += (cols << 4);
2520d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            pu1_u_src += 2 * (cols << 4);
2530d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            pu1_v_src += 2 * (cols << 4);
2540d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            for(i = 0; i < num_rows; i++)
2550d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            {
2560d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                for(j = 0; j < num_cols; j++)
2570d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                {
2580d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    pu1_u_dst[j] = pu1_u_src[j * 2];
2590d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                    pu1_v_dst[j] = pu1_v_src[j * 2];
2600d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                }
2610d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar
2620d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                pu1_u_dst += dst_strd;
2630d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                pu1_v_dst += dst_strd;
2640d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                pu1_u_src += src_strd;
2650d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar                pu1_v_src += src_strd;
2660d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar            }
2670d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar        }
2680d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    }
2690d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar    return;
2700d8951cef4b1a1dbf4ff5ba3e8796cf1d4503098Harish Mahendrakar}
271