1/******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*/
20/**
21*******************************************************************************
22* @file
23*  icv_sad.c
24*
25* @brief
26*  This file contains the functions to compute SAD
27*
28* @author
29*  Ittiam
30*
31* @par List of Functions:
32*  icv_sad_8x4_ssse3()
33*
34* @remarks
35*  None
36*
37*******************************************************************************
38*/
39/*****************************************************************************/
40/* File Includes                                                             */
41/*****************************************************************************/
42/* System include files */
43#include <stdio.h>
44#include <stdint.h>
45#include <string.h>
46#include <stdlib.h>
47#include <assert.h>
48#include <immintrin.h>
49
50/* User include files */
51#include "icv_datatypes.h"
52#include "icv_macros.h"
53#include "icv_platform_macros.h"
54#include "icv.h"
55
56/**
57*******************************************************************************
58*
59* @brief
60*  Compute 8x4 SAD
61*
62* @par   Description
63*  Compute 8x4 sum of absolute differences between source and reference block
64*
65* @param[in] pu1_src
66*  Source buffer
67*
68* @param[in] pu1_ref
69*  Reference buffer
70*
71* @param[in] src_strd
72*  Source stride
73*
74* @param[in] ref_strd
75*  Reference stride
76*
77* @param[in] wd
78*  Assumed to be 8
79*
80* @param[in] ht
81*  Assumed to be 4
82
83* @returns
84*  SAD
85*
86* @remarks
87*
88*******************************************************************************
89*/
90WORD32 icv_sad_8x4_ssse3(UWORD8 *pu1_src,
91                         UWORD8 *pu1_ref,
92                         WORD32 src_strd,
93                         WORD32 ref_strd,
94                         WORD32 wd,
95                         WORD32 ht)
96{
97    WORD32 sad;
98    __m128 src_r0, src_r1;
99    __m128 ref_r0, ref_r1;
100    __m128i res_r0, res_r1;
101
102    UNUSED(wd);
103    UNUSED(ht);
104    ASSERT(wd == 8);
105    ASSERT(ht == 4);
106
107    /* Load source */
108    src_r0 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_src));
109    pu1_src += src_strd;
110
111    src_r1 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_src));
112    pu1_src += src_strd;
113
114    src_r0 = _mm_loadh_pi (src_r0, (__m64 *) (pu1_src));
115    pu1_src += src_strd;
116
117    src_r1 = _mm_loadh_pi (src_r1, (__m64 *) (pu1_src));
118    pu1_src += src_strd;
119
120
121    /* Load reference */
122    ref_r0 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_ref));
123    pu1_ref += ref_strd;
124
125    ref_r1 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_ref));
126    pu1_ref += ref_strd;
127
128    ref_r0 = _mm_loadh_pi (ref_r0, (__m64 *) (pu1_ref));
129    pu1_ref += ref_strd;
130
131    ref_r1 = _mm_loadh_pi (ref_r1, (__m64 *) (pu1_ref));
132    pu1_ref += ref_strd;
133
134    /* Compute SAD for each row */
135    res_r0 = _mm_sad_epu8((__m128i)src_r0, (__m128i)ref_r0);
136    res_r1 = _mm_sad_epu8((__m128i)src_r1, (__m128i)ref_r1);
137
138    /* Accumulate SAD */
139    res_r0 = _mm_add_epi64(res_r0,  res_r1);
140    res_r0 = _mm_add_epi64(res_r0, _mm_srli_si128(res_r0, 8));
141
142    sad  = _mm_cvtsi128_si32(res_r0);
143
144    return sad;
145}
146