1aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/****************************************************************************** 2aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * 3aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * Copyright (C) 2015 The Android Open Source Project 4aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * 5aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * Licensed under the Apache License, Version 2.0 (the "License"); 6aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * you may not use this file except in compliance with the License. 7aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * You may obtain a copy of the License at: 8aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * 9aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * http://www.apache.org/licenses/LICENSE-2.0 10aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * 11aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * Unless required by applicable law or agreed to in writing, software 12aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * distributed under the License is distributed on an "AS IS" BASIS, 13aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * See the License for the specific language governing permissions and 15aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * limitations under the License. 16aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * 17aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ***************************************************************************** 18aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani*/ 20aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 21aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/** 22aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ******************************************************************************* 23aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * @file 24aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * impeg2_inter_pred_sse42_intr.c 25aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * 26aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * @brief 27aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * Contains Motion compensation function definitions for MPEG2 decoder 28aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * 29aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * @author 30aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * Mohit [100664] 31aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * 32aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * - impeg2_copy_mb_sse42() 33aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * - impeg2_interpolate_sse42() 34aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * - impeg2_mc_halfx_halfy_8x8_sse42() 35aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * - impeg2_mc_halfx_fully_8x8_sse42() 36aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * - impeg2_mc_fullx_halfy_8x8_sse42() 37aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * - impeg2_mc_fullx_fully_8x8_sse42() 38aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * 39aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * @remarks 40aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * None 41aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani * 42aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ******************************************************************************* 43aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani */ 44aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani#include <stdio.h> 45aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani#include <string.h> 46aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani#include "iv_datatypedef.h" 47aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani#include "impeg2_macros.h" 48aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani#include "impeg2_defs.h" 49aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani#include "impeg2_inter_pred.h" 50aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 51aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani#include <immintrin.h> 52aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani#include <emmintrin.h> 53aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani#include <smmintrin.h> 54aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani#include <tmmintrin.h> 55aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 56aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/******************************************************************************* 57aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani* Function Name : impeg2_copy_mb 58aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani* 59aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani* Description : copies 3 components to the frame from mc_buf 60aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani* 61aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani* Arguments : 62aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani* src_buf : Source Buffer 63aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani* dst_buf : Destination Buffer 64aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani* src_wd : Source Width 65aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani* dst_wd : destination Width 66aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani* 67aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani* Values Returned : None 68aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani*******************************************************************************/ 69aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhanivoid impeg2_copy_mb_sse42(yuv_buf_t *src_buf, 70aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani yuv_buf_t *dst_buf, 71aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani UWORD32 src_wd, 72aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani UWORD32 dst_wd) 73aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani{ 74aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani UWORD8 *src; 75aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani UWORD8 *dst; 76aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani __m128i src_r0, src_r1, src_r2, src_r3; 77aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 78aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /*******************************************************/ 79aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /* copy Y */ 80aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /*******************************************************/ 81aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src = src_buf->pu1_y; 82aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani dst = dst_buf->pu1_y; 83aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani // Row 0-3 84aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadu_si128((__m128i *) (src)); 85aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1 = _mm_loadu_si128((__m128i *) (src + src_wd)); 86aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r2 = _mm_loadu_si128((__m128i *) (src + 2 * src_wd)); 87aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r3 = _mm_loadu_si128((__m128i *) (src + 3 * src_wd)); 88aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 89aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) dst, src_r0); 90aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + dst_wd), src_r1); 91aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + 2 * dst_wd), src_r2); 92aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + 3 * dst_wd), src_r3); 93aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 94aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani // Row 4-7 95aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src += 4 * src_wd; 96aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani dst += 4 * dst_wd; 97aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadu_si128((__m128i *) (src)); 98aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1 = _mm_loadu_si128((__m128i *) (src + src_wd)); 99aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r2 = _mm_loadu_si128((__m128i *) (src + 2 * src_wd)); 100aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r3 = _mm_loadu_si128((__m128i *) (src + 3 * src_wd)); 101aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 102aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) dst, src_r0); 103aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + dst_wd), src_r1); 104aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + 2 * dst_wd), src_r2); 105aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + 3 * dst_wd), src_r3); 106aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 107aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani // Row 8-11 108aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src += 4 * src_wd; 109aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani dst += 4 * dst_wd; 110aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadu_si128((__m128i *) (src)); 111aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1 = _mm_loadu_si128((__m128i *) (src + src_wd)); 112aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r2 = _mm_loadu_si128((__m128i *) (src + 2 * src_wd)); 113aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r3 = _mm_loadu_si128((__m128i *) (src + 3 * src_wd)); 114aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 115aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) dst, src_r0); 116aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + dst_wd), src_r1); 117aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + 2 * dst_wd), src_r2); 118aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + 3 * dst_wd), src_r3); 119aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 120aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani // Row 12-15 121aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src += 4 * src_wd; 122aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani dst += 4 * dst_wd; 123aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadu_si128((__m128i *) (src)); 124aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1 = _mm_loadu_si128((__m128i *) (src + src_wd)); 125aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r2 = _mm_loadu_si128((__m128i *) (src + 2 * src_wd)); 126aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r3 = _mm_loadu_si128((__m128i *) (src + 3 * src_wd)); 127aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 128aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) dst, src_r0); 129aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + dst_wd), src_r1); 130aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + 2 * dst_wd), src_r2); 131aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + 3 * dst_wd), src_r3); 132aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 133aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_wd >>= 1; 134aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani dst_wd >>= 1; 135aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 136aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /*******************************************************/ 137aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /* copy U */ 138aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /*******************************************************/ 139aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src = src_buf->pu1_u; 140aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani dst = dst_buf->pu1_u; 141aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 142aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani // Row 0-3 143aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadl_epi64((__m128i *)src); 144aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1 = _mm_loadl_epi64((__m128i *)(src + src_wd)); 145aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r2 = _mm_loadl_epi64((__m128i *)(src + 2 * src_wd)); 146aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r3 = _mm_loadl_epi64((__m128i *)(src + 3 * src_wd)); 147aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 148aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)dst, src_r0); 149aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(dst + dst_wd), src_r1); 150aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(dst + 2 * dst_wd), src_r2); 151aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(dst + 3 * dst_wd), src_r3); 152aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 153aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani // Row 4-7 154aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src += 4 * src_wd; 155aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani dst += 4 * dst_wd; 156aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 157aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadl_epi64((__m128i *)src); 158aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1 = _mm_loadl_epi64((__m128i *)(src + src_wd)); 159aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r2 = _mm_loadl_epi64((__m128i *)(src + 2 * src_wd)); 160aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r3 = _mm_loadl_epi64((__m128i *)(src + 3 * src_wd)); 161aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 162aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)dst, src_r0); 163aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(dst + dst_wd), src_r1); 164aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(dst + 2 * dst_wd), src_r2); 165aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(dst + 3 * dst_wd), src_r3); 166aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 167aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /*******************************************************/ 168aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /* copy V */ 169aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /*******************************************************/ 170aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src = src_buf->pu1_v; 171aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani dst = dst_buf->pu1_v; 172aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani // Row 0-3 173aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadl_epi64((__m128i *)src); 174aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1 = _mm_loadl_epi64((__m128i *)(src + src_wd)); 175aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r2 = _mm_loadl_epi64((__m128i *)(src + 2 * src_wd)); 176aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r3 = _mm_loadl_epi64((__m128i *)(src + 3 * src_wd)); 177aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 178aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)dst, src_r0); 179aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(dst + dst_wd), src_r1); 180aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(dst + 2 * dst_wd), src_r2); 181aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(dst + 3 * dst_wd), src_r3); 182aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 183aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani // Row 4-7 184aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src += 4 * src_wd; 185aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani dst += 4 * dst_wd; 186aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 187aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadl_epi64((__m128i *)src); 188aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1 = _mm_loadl_epi64((__m128i *)(src + src_wd)); 189aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r2 = _mm_loadl_epi64((__m128i *)(src + 2 * src_wd)); 190aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r3 = _mm_loadl_epi64((__m128i *)(src + 3 * src_wd)); 191aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 192aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)dst, src_r0); 193aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(dst + dst_wd), src_r1); 194aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(dst + 2 * dst_wd), src_r2); 195aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(dst + 3 * dst_wd), src_r3); 196aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani} 197aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 198aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/*****************************************************************************/ 199aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 200aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Function Name : impeg2_interpolate */ 201aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 202aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Description : averages the contents of buf_src1 and buf_src2 and stores*/ 203aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* result in buf_dst */ 204aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 205aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Inputs : buf_src1 - First Source */ 206aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* buf_src2 - Second Source */ 207aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 208aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Globals : None */ 209aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 210aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Processing : Avg the values from two sources and store the result in */ 211aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* destination buffer */ 212aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 213aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Outputs : buf_dst - Avg of contents of buf_src1 and buf_src2 */ 214aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 215aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Returns : None */ 216aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 217aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Issues : Assumes that all 3 buffers are of same size */ 218aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 219aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/*****************************************************************************/ 220aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhanivoid impeg2_interpolate_sse42(yuv_buf_t *buf_src1, 221aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani yuv_buf_t *buf_src2, 222aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani yuv_buf_t *buf_dst, 223aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani UWORD32 stride) 224aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani{ 225aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani UWORD8 *src1, *src2; 226aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani UWORD8 *dst; 227aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani __m128i src1_r0, src1_r1, src1_r2, src1_r3; 228aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani __m128i src2_r0, src2_r1, src2_r2, src2_r3; 229aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 230aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /*******************************************************/ 231aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /* interpolate Y */ 232aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /*******************************************************/ 233aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1 = buf_src1->pu1_y; 234aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2 = buf_src2->pu1_y; 235aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani dst = buf_dst->pu1_y; 236aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani // Row 0-3 237aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r0 = _mm_loadu_si128((__m128i *) (src1)); 238aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r1 = _mm_loadu_si128((__m128i *) (src1 + 16)); 239aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r2 = _mm_loadu_si128((__m128i *) (src1 + 2 * 16)); 240aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r3 = _mm_loadu_si128((__m128i *) (src1 + 3 * 16)); 241aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 242aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r0 = _mm_loadu_si128((__m128i *) (src2)); 243aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r1 = _mm_loadu_si128((__m128i *) (src2 + 16)); 244aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r2 = _mm_loadu_si128((__m128i *) (src2 + 2 * 16)); 245aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r3 = _mm_loadu_si128((__m128i *) (src2 + 3 * 16)); 246aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 247aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0); 248aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1); 249aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2); 250aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3); 251aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 252aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) dst, src1_r0); 253aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + stride), src1_r1); 254aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + 2 * stride), src1_r2); 255aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + 3 * stride), src1_r3); 256aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 257aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani // Row 4-7 258aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1 += 4 * 16; 259aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2 += 4 * 16; 260aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani dst += 4 * stride; 261aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r0 = _mm_loadu_si128((__m128i *) (src1)); 262aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r1 = _mm_loadu_si128((__m128i *) (src1 + 16)); 263aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r2 = _mm_loadu_si128((__m128i *) (src1 + 2 * 16)); 264aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r3 = _mm_loadu_si128((__m128i *) (src1 + 3 * 16)); 265aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 266aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r0 = _mm_loadu_si128((__m128i *) (src2)); 267aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r1 = _mm_loadu_si128((__m128i *) (src2 + 16)); 268aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r2 = _mm_loadu_si128((__m128i *) (src2 + 2 * 16)); 269aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r3 = _mm_loadu_si128((__m128i *) (src2 + 3 * 16)); 270aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 271aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0); 272aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1); 273aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2); 274aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3); 275aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 276aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) dst, src1_r0); 277aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + stride), src1_r1); 278aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + 2 * stride), src1_r2); 279aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + 3 * stride), src1_r3); 280aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 281aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani // Row 8-11 282aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1 += 4 * 16; 283aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2 += 4 * 16; 284aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani dst += 4 * stride; 285aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r0 = _mm_loadu_si128((__m128i *) (src1)); 286aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r1 = _mm_loadu_si128((__m128i *) (src1 + 16)); 287aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r2 = _mm_loadu_si128((__m128i *) (src1 + 2 * 16)); 288aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r3 = _mm_loadu_si128((__m128i *) (src1 + 3 * 16)); 289aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 290aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r0 = _mm_loadu_si128((__m128i *) (src2)); 291aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r1 = _mm_loadu_si128((__m128i *) (src2 + 16)); 292aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r2 = _mm_loadu_si128((__m128i *) (src2 + 2 * 16)); 293aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r3 = _mm_loadu_si128((__m128i *) (src2 + 3 * 16)); 294aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 295aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0); 296aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1); 297aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2); 298aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3); 299aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 300aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) dst, src1_r0); 301aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + stride), src1_r1); 302aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + 2 * stride), src1_r2); 303aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + 3 * stride), src1_r3); 304aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 305aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani // Row 12-15 306aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1 += 4 * 16; 307aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2 += 4 * 16; 308aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani dst += 4 * stride; 309aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r0 = _mm_loadu_si128((__m128i *) (src1)); 310aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r1 = _mm_loadu_si128((__m128i *) (src1 + 16)); 311aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r2 = _mm_loadu_si128((__m128i *) (src1 + 2 * 16)); 312aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r3 = _mm_loadu_si128((__m128i *) (src1 + 3 * 16)); 313aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 314aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r0 = _mm_loadu_si128((__m128i *) (src2)); 315aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r1 = _mm_loadu_si128((__m128i *) (src2 + 16)); 316aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r2 = _mm_loadu_si128((__m128i *) (src2 + 2 * 16)); 317aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r3 = _mm_loadu_si128((__m128i *) (src2 + 3 * 16)); 318aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 319aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0); 320aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1); 321aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2); 322aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3); 323aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 324aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) dst, src1_r0); 325aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + stride), src1_r1); 326aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + 2 * stride), src1_r2); 327aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storeu_si128((__m128i *) (dst + 3 * stride), src1_r3); 328aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 329aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani stride >>= 1; 330aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 331aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /*******************************************************/ 332aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /* interpolate U */ 333aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /*******************************************************/ 334aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1 = buf_src1->pu1_u; 335aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2 = buf_src2->pu1_u; 336aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani dst = buf_dst->pu1_u; 337aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani // Row 0-3 338aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r0 = _mm_loadl_epi64((__m128i *) (src1)); 339aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r1 = _mm_loadl_epi64((__m128i *) (src1 + 8)); 340aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r2 = _mm_loadl_epi64((__m128i *) (src1 + 2 * 8)); 341aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r3 = _mm_loadl_epi64((__m128i *) (src1 + 3 * 8)); 342aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 343aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r0 = _mm_loadl_epi64((__m128i *) (src2)); 344aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r1 = _mm_loadl_epi64((__m128i *) (src2 + 8)); 345aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r2 = _mm_loadl_epi64((__m128i *) (src2 + 2 * 8)); 346aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r3 = _mm_loadl_epi64((__m128i *) (src2 + 3 * 8)); 347aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 348aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0); 349aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1); 350aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2); 351aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3); 352aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 353aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *) dst, src1_r0); 354aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *) (dst + stride), src1_r1); 355aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *) (dst + 2 * stride), src1_r2); 356aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *) (dst + 3 * stride), src1_r3); 357aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 358aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani // Row 4-7 359aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1 += 4 * 8; 360aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2 += 4 * 8; 361aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani dst += 4 * stride; 362aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 363aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r0 = _mm_loadl_epi64((__m128i *) (src1)); 364aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r1 = _mm_loadl_epi64((__m128i *) (src1 + 8)); 365aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r2 = _mm_loadl_epi64((__m128i *) (src1 + 2 * 8)); 366aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r3 = _mm_loadl_epi64((__m128i *) (src1 + 3 * 8)); 367aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 368aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r0 = _mm_loadl_epi64((__m128i *) (src2)); 369aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r1 = _mm_loadl_epi64((__m128i *) (src2 + 8)); 370aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r2 = _mm_loadl_epi64((__m128i *) (src2 + 2 * 8)); 371aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r3 = _mm_loadl_epi64((__m128i *) (src2 + 3 * 8)); 372aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 373aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0); 374aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1); 375aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2); 376aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3); 377aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 378aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *) dst, src1_r0); 379aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *) (dst + stride), src1_r1); 380aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *) (dst + 2 * stride), src1_r2); 381aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *) (dst + 3 * stride), src1_r3); 382aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 383aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /*******************************************************/ 384aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /* interpolate V */ 385aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /*******************************************************/ 386aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1 = buf_src1->pu1_v; 387aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2 = buf_src2->pu1_v; 388aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani dst = buf_dst->pu1_v; 389aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 390aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani // Row 0-3 391aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r0 = _mm_loadl_epi64((__m128i *) (src1)); 392aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r1 = _mm_loadl_epi64((__m128i *) (src1 + 8)); 393aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r2 = _mm_loadl_epi64((__m128i *) (src1 + 2 * 8)); 394aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r3 = _mm_loadl_epi64((__m128i *) (src1 + 3 * 8)); 395aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 396aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r0 = _mm_loadl_epi64((__m128i *) (src2)); 397aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r1 = _mm_loadl_epi64((__m128i *) (src2 + 8)); 398aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r2 = _mm_loadl_epi64((__m128i *) (src2 + 2 * 8)); 399aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r3 = _mm_loadl_epi64((__m128i *) (src2 + 3 * 8)); 400aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 401aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0); 402aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1); 403aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2); 404aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3); 405aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 406aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *) dst, src1_r0); 407aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *) (dst + stride), src1_r1); 408aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *) (dst + 2 * stride), src1_r2); 409aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *) (dst + 3 * stride), src1_r3); 410aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 411aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani // Row 4-7 412aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1 += 4 * 8; 413aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2 += 4 * 8; 414aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani dst += 4 * stride; 415aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 416aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r0 = _mm_loadl_epi64((__m128i *) (src1)); 417aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r1 = _mm_loadl_epi64((__m128i *) (src1 + 8)); 418aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r2 = _mm_loadl_epi64((__m128i *) (src1 + 2 * 8)); 419aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r3 = _mm_loadl_epi64((__m128i *) (src1 + 3 * 8)); 420aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 421aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r0 = _mm_loadl_epi64((__m128i *) (src2)); 422aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r1 = _mm_loadl_epi64((__m128i *) (src2 + 8)); 423aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r2 = _mm_loadl_epi64((__m128i *) (src2 + 2 * 8)); 424aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src2_r3 = _mm_loadl_epi64((__m128i *) (src2 + 3 * 8)); 425aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 426aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r0 = _mm_avg_epu8 (src1_r0, src2_r0); 427aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r1 = _mm_avg_epu8 (src1_r1, src2_r1); 428aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r2 = _mm_avg_epu8 (src1_r2, src2_r2); 429aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src1_r3 = _mm_avg_epu8 (src1_r3, src2_r3); 430aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 431aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *) dst, src1_r0); 432aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *) (dst + stride), src1_r1); 433aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *) (dst + 2 * stride), src1_r2); 434aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *) (dst + 3 * stride), src1_r3); 435aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani} 436aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 437aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/*****************************************************************************/ 438aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 439aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Function Name : impeg2_mc_halfx_halfy_8x8_sse42() */ 440aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 441aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Description : Gets the buffer from (0.5,0.5) to (8.5,8.5) */ 442aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* and the above block of size 8 x 8 will be placed as a */ 443aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* block from the current position of out_buf */ 444aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 445aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Inputs : ref - Reference frame from which the block will be */ 446aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* block will be extracted. */ 447aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* ref_wid - WIdth of reference frame */ 448aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* out_wid - WIdth of the output frame */ 449aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* blk_width - width of the block */ 450aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* blk_width - height of the block */ 451aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 452aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Globals : None */ 453aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 454aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Processing : Point to the (0,0),(1,0),(0,1),(1,1) position in */ 455aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* the ref frame.Interpolate these four values to get the */ 456aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* value at(0.5,0.5).Repeat this to get an 8 x 8 block */ 457aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* using 9 x 9 block from reference frame */ 458aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 459aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Outputs : out - Output containing the extracted block */ 460aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 461aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Returns : None */ 462aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 463aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Issues : None */ 464aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 465aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/*****************************************************************************/ 466aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhanivoid impeg2_mc_halfx_halfy_8x8_sse42(UWORD8 *out, 467aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani UWORD8 *ref, 468aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani UWORD32 ref_wid, 469aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani UWORD32 out_wid) 470aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani{ 471aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani UWORD8 *ref_p0,*ref_p1,*ref_p2,*ref_p3; 472aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /* P0-P3 are the pixels in the reference frame and Q is the value being */ 473aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /* estimated */ 474aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /* 475aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani P0 P1 476aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani Q 477aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani P2 P3 478aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani */ 479aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani __m128i src_r0, src_r0_1, src_r1, src_r1_1; 480aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani __m128i tmp0, tmp1; 481aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani __m128i value_2 = _mm_set1_epi16(2); 482aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 483aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p0 = ref; 484aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p1 = ref + 1; 485aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p2 = ref + ref_wid; 486aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p3 = ref + ref_wid + 1; 487aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 488aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadl_epi64((__m128i *) (ref_p0)); //Row 0 489aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p1)); 490aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1 = _mm_loadl_epi64((__m128i *) (ref_p2)); //Row 1 491aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1_1 = _mm_loadl_epi64((__m128i *) (ref_p3)); 492aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 493aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_cvtepu8_epi16(src_r0); 494aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0_1 = _mm_cvtepu8_epi16(src_r0_1); 495aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1 = _mm_cvtepu8_epi16(src_r1); 496aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1_1 = _mm_cvtepu8_epi16(src_r1_1); 497aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 498aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp0 = _mm_add_epi16(src_r0, src_r0_1); //Row 0 horizontal interpolation 499aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp1 = _mm_add_epi16(src_r1, src_r1_1); //Row 1 horizontal interpolation 500aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp0 = _mm_add_epi16(tmp0, tmp1); //Row 0 vertical interpolation 501aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp0 = _mm_add_epi16(tmp0, value_2); 502aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp0 = _mm_srli_epi16(tmp0, 2); 503aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp0 = _mm_packus_epi16(tmp0, value_2); 504aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 505aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)out, tmp0); 506aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 507aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani //Row 1 508aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p2 += ref_wid; 509aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p3 += ref_wid; 510aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani out += out_wid; 511aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 512aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadl_epi64((__m128i *) (ref_p2)); //Row 2 513aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p3)); 514aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 515aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_cvtepu8_epi16(src_r0); 516aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0_1 = _mm_cvtepu8_epi16(src_r0_1); 517aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 518aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp0 = _mm_add_epi16(src_r0, src_r0_1); //Row 2 horizontal interpolation 519aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp1 = _mm_add_epi16(tmp0, tmp1); //Row 1 vertical interpolation 520aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp1 = _mm_add_epi16(tmp1, value_2); 521aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp1 = _mm_srli_epi16(tmp1, 2); 522aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp1 = _mm_packus_epi16(tmp1, value_2); 523aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 524aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)out, tmp1); 525aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 526aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani //Row 2 527aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p2 += ref_wid; 528aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p3 += ref_wid; 529aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani out += out_wid; 530aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 531aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadl_epi64((__m128i *) (ref_p2)); //Row 3 532aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p3)); 533aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 534aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_cvtepu8_epi16(src_r0); 535aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0_1 = _mm_cvtepu8_epi16(src_r0_1); 536aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 537aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp1 = _mm_add_epi16(src_r0, src_r0_1); //Row 3 horizontal interpolation 538aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 539aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp0 = _mm_add_epi16(tmp0, tmp1); //Row 2 vertical interpolation 540aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp0 = _mm_add_epi16(tmp0, value_2); 541aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp0 = _mm_srli_epi16(tmp0, 2); 542aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp0 = _mm_packus_epi16(tmp0, value_2); 543aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 544aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)out, tmp0); 545aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 546aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani //Row 3 547aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p2 += ref_wid; 548aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p3 += ref_wid; 549aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani out += out_wid; 550aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 551aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadl_epi64((__m128i *) (ref_p2)); //Row 4 552aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p3)); 553aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 554aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_cvtepu8_epi16(src_r0); 555aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0_1 = _mm_cvtepu8_epi16(src_r0_1); 556aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 557aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp0 = _mm_add_epi16(src_r0, src_r0_1); //Row 4 horizontal interpolation 558aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 559aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp1 = _mm_add_epi16(tmp0, tmp1); //Row 3 vertical interpolation 560aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp1 = _mm_add_epi16(tmp1, value_2); 561aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp1 = _mm_srli_epi16(tmp1, 2); 562aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp1 = _mm_packus_epi16(tmp1, value_2); 563aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 564aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)out, tmp1); 565aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 566aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani //Row 4 567aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p2 += ref_wid; 568aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p3 += ref_wid; 569aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani out += out_wid; 570aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 571aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadl_epi64((__m128i *) (ref_p2)); //Row 5 572aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p3)); 573aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 574aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_cvtepu8_epi16(src_r0); 575aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0_1 = _mm_cvtepu8_epi16(src_r0_1); 576aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 577aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp1 = _mm_add_epi16(src_r0, src_r0_1); //Row 5 horizontal interpolation 578aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 579aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp0 = _mm_add_epi16(tmp0, tmp1); //Row 4 vertical interpolation 580aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp0 = _mm_add_epi16(tmp0, value_2); 581aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp0 = _mm_srli_epi16(tmp0, 2); 582aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp0 = _mm_packus_epi16(tmp0, value_2); 583aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 584aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)out, tmp0); 585aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 586aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani //Row 5 587aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p2 += ref_wid; 588aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p3 += ref_wid; 589aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani out += out_wid; 590aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 591aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadl_epi64((__m128i *) (ref_p2)); //Row 6 592aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p3)); 593aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 594aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_cvtepu8_epi16(src_r0); 595aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0_1 = _mm_cvtepu8_epi16(src_r0_1); 596aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 597aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp0 = _mm_add_epi16(src_r0, src_r0_1); //Row 6 horizontal interpolation 598aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 599aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp1 = _mm_add_epi16(tmp0, tmp1); //Row 5 vertical interpolation 600aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp1 = _mm_add_epi16(tmp1, value_2); 601aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp1 = _mm_srli_epi16(tmp1, 2); 602aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp1 = _mm_packus_epi16(tmp1, value_2); 603aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 604aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)out, tmp1); 605aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 606aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani //Row 6 607aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p2 += ref_wid; 608aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p3 += ref_wid; 609aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani out += out_wid; 610aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 611aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadl_epi64((__m128i *) (ref_p2)); //Row 7 612aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p3)); 613aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 614aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_cvtepu8_epi16(src_r0); 615aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0_1 = _mm_cvtepu8_epi16(src_r0_1); 616aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 617aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp1 = _mm_add_epi16(src_r0, src_r0_1); //Row 7 horizontal interpolation 618aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 619aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp0 = _mm_add_epi16(tmp0, tmp1); //Row 6 vertical interpolation 620aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp0 = _mm_add_epi16(tmp0, value_2); 621aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp0 = _mm_srli_epi16(tmp0, 2); 622aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp0 = _mm_packus_epi16(tmp0, value_2); 623aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 624aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)out, tmp0); 625aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 626aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani //Row 7 627aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p2 += ref_wid; 628aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p3 += ref_wid; 629aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani out += out_wid; 630aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 631aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadl_epi64((__m128i *) (ref_p2)); //Row 8 632aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p3)); 633aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 634aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_cvtepu8_epi16(src_r0); 635aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0_1 = _mm_cvtepu8_epi16(src_r0_1); 636aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 637aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp0 = _mm_add_epi16(src_r0, src_r0_1); //Row 8 horizontal interpolation 638aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 639aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp1 = _mm_add_epi16(tmp0, tmp1); //Row 7 vertical interpolation 640aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp1 = _mm_add_epi16(tmp1, value_2); 641aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp1 = _mm_srli_epi16(tmp1, 2); 642aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani tmp1 = _mm_packus_epi16(tmp1, value_2); 643aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 644aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)out, tmp1); 645aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 646aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani return; 647aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani} 648aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 649aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/*****************************************************************************/ 650aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 651aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Function Name : impeg2_mc_halfx_fully_8x8_sse42() */ 652aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 653aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Description : Gets the buffer from (0.5,0) to (8.5,8) */ 654aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* and the above block of size 8 x 8 will be placed as a */ 655aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* block from the current position of out_buf */ 656aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 657aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Inputs : ref - Reference frame from which the block will be */ 658aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* block will be extracted. */ 659aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* ref_wid - WIdth of reference frame */ 660aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* out_wid - WIdth of the output frame */ 661aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* blk_width - width of the block */ 662aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* blk_width - height of the block */ 663aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 664aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Globals : None */ 665aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 666aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Processing : Point to the (0,0) and (1,0) position in the ref frame */ 667aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Interpolate these two values to get the value at(0.5,0) */ 668aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Repeat this to get an 8 x 8 block using 9 x 8 block from */ 669aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* reference frame */ 670aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 671aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Outputs : out - Output containing the extracted block */ 672aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 673aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Returns : None */ 674aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 675aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Issues : None */ 676aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 677aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/*****************************************************************************/ 678aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhanivoid impeg2_mc_halfx_fully_8x8_sse42(UWORD8 *out, 679aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani UWORD8 *ref, 680aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani UWORD32 ref_wid, 681aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani UWORD32 out_wid) 682aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani{ 683aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani UWORD8 *ref_p0,*ref_p1; 684aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani __m128i src_r0, src_r0_1, src_r1, src_r1_1; 685aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /* P0-P3 are the pixels in the reference frame and Q is the value being */ 686aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /* estimated */ 687aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /* 688aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani P0 Q P1 689aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani */ 690aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 691aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p0 = ref; 692aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p1 = ref + 1; 693aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 694aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani // Row 0 and 1 695aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadl_epi64((__m128i *) (ref_p0)); //Row 0 696aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p1)); 697aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1 = _mm_loadl_epi64((__m128i *) (ref_p0 + ref_wid)); //Row 1 698aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1_1 = _mm_loadl_epi64((__m128i *) (ref_p1 + ref_wid)); 699aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 700aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_avg_epu8(src_r0, src_r0_1); 701aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1 = _mm_avg_epu8(src_r1, src_r1_1); 702aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 703aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)out, src_r0); 704aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(out + out_wid), src_r1); 705aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 706aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani // Row 2 and 3 707aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p0 += 2*ref_wid; 708aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p1 += 2*ref_wid; 709aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani out += 2*out_wid; 710aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 711aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadl_epi64((__m128i *) (ref_p0)); //Row 2 712aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p1)); 713aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1 = _mm_loadl_epi64((__m128i *) (ref_p0 + ref_wid)); //Row 3 714aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1_1 = _mm_loadl_epi64((__m128i *) (ref_p1 + ref_wid)); 715aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 716aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_avg_epu8(src_r0, src_r0_1); 717aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1 = _mm_avg_epu8(src_r1, src_r1_1); 718aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 719aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)out, src_r0); 720aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(out + out_wid), src_r1); 721aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 722aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani // Row 4 and 5 723aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p0 += 2*ref_wid; 724aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p1 += 2*ref_wid; 725aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani out += 2*out_wid; 726aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 727aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadl_epi64((__m128i *) (ref_p0)); //Row 4 728aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p1)); 729aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1 = _mm_loadl_epi64((__m128i *) (ref_p0 + ref_wid)); //Row 5 730aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1_1 = _mm_loadl_epi64((__m128i *) (ref_p1 + ref_wid)); 731aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 732aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_avg_epu8(src_r0, src_r0_1); 733aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1 = _mm_avg_epu8(src_r1, src_r1_1); 734aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 735aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)out, src_r0); 736aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(out + out_wid), src_r1); 737aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 738aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani // Row 6 and 7 739aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p0 += 2*ref_wid; 740aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref_p1 += 2*ref_wid; 741aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani out += 2*out_wid; 742aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 743aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadl_epi64((__m128i *) (ref_p0)); //Row 6 744aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0_1 = _mm_loadl_epi64((__m128i *) (ref_p1)); 745aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1 = _mm_loadl_epi64((__m128i *) (ref_p0 + ref_wid)); //Row 7 746aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1_1 = _mm_loadl_epi64((__m128i *) (ref_p1 + ref_wid)); 747aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 748aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_avg_epu8(src_r0, src_r0_1); 749aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1 = _mm_avg_epu8(src_r1, src_r1_1); 750aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 751aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)out, src_r0); 752aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(out + out_wid), src_r1); 753aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 754aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani return; 755aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani} 756aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 757aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 758aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/*****************************************************************************/ 759aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 760aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Function Name : impeg2_mc_fullx_halfy_8x8_sse42() */ 761aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 762aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Description : Gets the buffer from (0,0.5) to (8,8.5) */ 763aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* and the above block of size 8 x 8 will be placed as a */ 764aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* block from the current position of out_buf */ 765aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 766aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Inputs : ref - Reference frame from which the block will be */ 767aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* block will be extracted. */ 768aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* ref_wid - WIdth of reference frame */ 769aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* out_wid - WIdth of the output frame */ 770aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* blk_width - width of the block */ 771aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* blk_width - height of the block */ 772aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 773aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Globals : None */ 774aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 775aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Processing : Point to the (0,0) and (0,1) position in the ref frame */ 776aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Interpolate these two values to get the value at(0,0.5) */ 777aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Repeat this to get an 8 x 8 block using 8 x 9 block from */ 778aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* reference frame */ 779aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 780aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Outputs : out - Output containing the extracted block */ 781aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 782aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Returns : None */ 783aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 784aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Issues : None */ 785aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 786aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/*****************************************************************************/ 787aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhanivoid impeg2_mc_fullx_halfy_8x8_sse42(UWORD8 *out, 788aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani UWORD8 *ref, 789aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani UWORD32 ref_wid, 790aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani UWORD32 out_wid) 791aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani{ 792aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani __m128i src_r0, src_r1, src_r2, temp0, temp1; 793aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /* P0-P3 are the pixels in the reference frame and Q is the value being */ 794aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /* estimated */ 795aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani /* 796aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani P0 797aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani x 798aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani P1 799aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani */ 800aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadl_epi64((__m128i *)ref); //Row 0 801aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1 = _mm_loadl_epi64((__m128i *)(ref + ref_wid)); //Row 1 802aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r2 = _mm_loadl_epi64((__m128i *)(ref + 2 * ref_wid)); //Row 2 803aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani temp0 = _mm_avg_epu8(src_r0, src_r1); 804aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani temp1 = _mm_avg_epu8(src_r1, src_r2); 805aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)out, temp0); //Row 0 806aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(out + out_wid), temp1); //Row 1 807aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 808aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref+= 3*ref_wid; 809aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani out+= 2*out_wid; 810aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 811aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadl_epi64((__m128i *)ref); //Row 3 812aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1 = _mm_loadl_epi64((__m128i *)(ref + ref_wid)); //Row 4 813aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani temp0 = _mm_avg_epu8(src_r2, src_r0); 814aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani temp1 = _mm_avg_epu8(src_r0, src_r1); 815aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)out, temp0); //Row 2 816aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(out + out_wid), temp1); //Row 3 817aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 818aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref += 2*ref_wid; 819aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani out+= 2*out_wid; 820aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 821aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r2 = _mm_loadl_epi64((__m128i *)ref); //Row 5 822aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadl_epi64((__m128i *)(ref + ref_wid)); //Row 6 823aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani temp0 = _mm_avg_epu8(src_r1, src_r2); 824aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani temp1 = _mm_avg_epu8(src_r2, src_r0); 825aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)out, temp0); //Row 4 826aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(out + out_wid), temp1); //Row 5 827aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 828aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref += 2*ref_wid; 829aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani out+= 2*out_wid; 830aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 831aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1 = _mm_loadl_epi64((__m128i *)ref); //Row 7 832aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r2 = _mm_loadl_epi64((__m128i *) (ref + ref_wid)); //Row 8 833aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani temp0 = _mm_avg_epu8(src_r0, src_r1); 834aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani temp1 = _mm_avg_epu8(src_r1, src_r2); 835aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)out, temp0); //Row 6 836aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(out + out_wid), temp1); //Row 7 837aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 838aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani return; 839aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani} 840aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 841aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/*****************************************************************************/ 842aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 843aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Function Name : impeg2_mc_fullx_fully_8x8_sse42() */ 844aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 845aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Description : Gets the buffer from (x,y) to (x+8,y+8) */ 846aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* and the above block of size 8 x 8 will be placed as a */ 847aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* block from the current position of out_buf */ 848aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 849aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Inputs : ref - Reference frame from which the block will be */ 850aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* block will be extracted. */ 851aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* ref_wid - WIdth of reference frame */ 852aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* out_wid - WIdth of the output frame */ 853aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* blk_width - width of the block */ 854aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* blk_width - height of the block */ 855aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 856aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Globals : None */ 857aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 858aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Processing : Point to the (0,0) position in the ref frame */ 859aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Get an 8 x 8 block from reference frame */ 860aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 861aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Outputs : out - Output containing the extracted block */ 862aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 863aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Returns : None */ 864aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 865aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* Issues : None */ 866aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/* */ 867aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani/*****************************************************************************/ 868aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhanivoid impeg2_mc_fullx_fully_8x8_sse42(UWORD8 *out, 869aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani UWORD8 *ref, 870aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani UWORD32 ref_wid, 871aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani UWORD32 out_wid) 872aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani{ 873aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani __m128i src_r0, src_r1, src_r2, src_r3; 874aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani // Row 0-3 875aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadl_epi64((__m128i *)ref); 876aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1 = _mm_loadl_epi64((__m128i *)(ref + ref_wid)); 877aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r2 = _mm_loadl_epi64((__m128i *)(ref + 2 * ref_wid)); 878aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r3 = _mm_loadl_epi64((__m128i *)(ref + 3 * ref_wid)); 879aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 880aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)out, src_r0); 881aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(out + out_wid), src_r1); 882aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(out + 2 * out_wid), src_r2); 883aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(out + 3 * out_wid), src_r3); 884aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 885aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani // Row 4-7 886aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani ref += 4 * ref_wid; 887aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani out += 4 * out_wid; 888aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 889aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r0 = _mm_loadl_epi64((__m128i *)ref); 890aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r1 = _mm_loadl_epi64((__m128i *)(ref + ref_wid)); 891aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r2 = _mm_loadl_epi64((__m128i *)(ref + 2 * ref_wid)); 892aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani src_r3 = _mm_loadl_epi64((__m128i *)(ref + 3 * ref_wid)); 893aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani 894aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)out, src_r0); 895aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(out + out_wid), src_r1); 896aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(out + 2 * out_wid), src_r2); 897aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani _mm_storel_epi64((__m128i *)(out + 3 * out_wid), src_r3); 898aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani return; 899aed24eee7ddfc93f1436b0c1679431bd286879b4Venkatarama Avadhani} 900