1/****************************************************************************** 2 * 3 * Copyright (C) 2015 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19*/ 20/** 21******************************************************************************* 22* @file 23* icv_sad.c 24* 25* @brief 26* This file contains the functions to compute SAD 27* 28* @author 29* Ittiam 30* 31* @par List of Functions: 32* icv_sad_8x4_ssse3() 33* 34* @remarks 35* None 36* 37******************************************************************************* 38*/ 39/*****************************************************************************/ 40/* File Includes */ 41/*****************************************************************************/ 42/* System include files */ 43#include <stdio.h> 44#include <stdint.h> 45#include <string.h> 46#include <stdlib.h> 47#include <assert.h> 48#include <immintrin.h> 49 50/* User include files */ 51#include "icv_datatypes.h" 52#include "icv_macros.h" 53#include "icv_platform_macros.h" 54#include "icv.h" 55 56/** 57******************************************************************************* 58* 59* @brief 60* Compute 8x4 SAD 61* 62* @par Description 63* Compute 8x4 sum of absolute differences between source and reference block 64* 65* @param[in] pu1_src 66* Source buffer 67* 68* @param[in] pu1_ref 69* Reference buffer 70* 71* @param[in] src_strd 72* Source stride 73* 74* @param[in] ref_strd 75* Reference stride 76* 77* @param[in] wd 78* Assumed to be 8 79* 80* @param[in] ht 81* Assumed to be 4 82 83* @returns 84* SAD 85* 86* @remarks 87* 88******************************************************************************* 89*/ 90WORD32 icv_sad_8x4_ssse3(UWORD8 *pu1_src, 91 UWORD8 *pu1_ref, 92 WORD32 src_strd, 93 WORD32 ref_strd, 94 WORD32 wd, 95 WORD32 ht) 96{ 97 WORD32 sad; 98 __m128 src_r0, src_r1; 99 __m128 ref_r0, ref_r1; 100 __m128i res_r0, res_r1; 101 102 UNUSED(wd); 103 UNUSED(ht); 104 ASSERT(wd == 8); 105 ASSERT(ht == 4); 106 107 /* Load source */ 108 src_r0 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_src)); 109 pu1_src += src_strd; 110 111 src_r1 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_src)); 112 pu1_src += src_strd; 113 114 src_r0 = _mm_loadh_pi (src_r0, (__m64 *) (pu1_src)); 115 pu1_src += src_strd; 116 117 src_r1 = _mm_loadh_pi (src_r1, (__m64 *) (pu1_src)); 118 pu1_src += src_strd; 119 120 121 /* Load reference */ 122 ref_r0 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_ref)); 123 pu1_ref += ref_strd; 124 125 ref_r1 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_ref)); 126 pu1_ref += ref_strd; 127 128 ref_r0 = _mm_loadh_pi (ref_r0, (__m64 *) (pu1_ref)); 129 pu1_ref += ref_strd; 130 131 ref_r1 = _mm_loadh_pi (ref_r1, (__m64 *) (pu1_ref)); 132 pu1_ref += ref_strd; 133 134 /* Compute SAD for each row */ 135 res_r0 = _mm_sad_epu8((__m128i)src_r0, (__m128i)ref_r0); 136 res_r1 = _mm_sad_epu8((__m128i)src_r1, (__m128i)ref_r1); 137 138 /* Accumulate SAD */ 139 res_r0 = _mm_add_epi64(res_r0, res_r1); 140 res_r0 = _mm_add_epi64(res_r0, _mm_srli_si128(res_r0, 8)); 141 142 sad = _mm_cvtsi128_si32(res_r0); 143 144 return sad; 145} 146