1/******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*/
20/**
21*******************************************************************************
22* @file
23*  ideint_cac_ssse3.c
24*
25* @brief
26*  This file include the definitions of the combing  artifact check function
27* of the de-interlacer and some  variant of that.
28*
29* @author
30*  Ittiam
31*
32* @par List of Functions:
33*  cac_4x8()
34*  ideint_cac()
35*
36* @remarks
37*  In the de-interlacer workspace, cac is not a seperate  assembly module as
38* it comes along with the  de_int_decision() function. But in C-Model, to
39* keep  the things cleaner, it was made to be a separate  function during
40* cac experiments long after the  assembly was written by Mudit.
41*
42*******************************************************************************
43*/
44/*****************************************************************************/
45/* File Includes                                                             */
46/*****************************************************************************/
47/* System include files */
48#include <stdio.h>
49#include <stdint.h>
50#include <string.h>
51#include <stdlib.h>
52#include <immintrin.h>
53
54/* User include files */
55#include "icv_datatypes.h"
56#include "icv_macros.h"
57#include "icv.h"
58#include "icv_variance.h"
59#include "icv_sad.h"
60#include "ideint.h"
61#include "ideint_defs.h"
62#include "ideint_structs.h"
63#include "ideint_cac.h"
64
65/**
66*******************************************************************************
67*
68* @brief
69* Combing artifact check function for 8x8 block
70*
71* @par   Description
72* Determines CAC for 8x8 block by calling 8x4 CAC function
73*
74* @param[in] pu1_top
75*  Top field
76*
77* @param[in] pu1_bot
78*  Bottom field
79*
80* @param[in] top_strd
81*  Top field Stride
82*
83* @param[in] bot_strd
84*  Bottom field stride
85*
86* @returns
87* combing artifact flag (1 = detected, 0 = not detected)
88*
89* @remarks
90*
91*******************************************************************************
92*/
93WORD32 ideint_cac_8x8_ssse3(UWORD8 *pu1_top,
94                            UWORD8 *pu1_bot,
95                            WORD32 top_strd,
96                            WORD32 bot_strd)
97{
98    WORD32 ca;        /* combing artifact result                          */
99    WORD32 i;
100    WORD32 adj[2] = {0};
101    WORD32 alt[2] = {0};
102    WORD32 sum_1, sum_2, sum_3, sum_4;
103    WORD32 sum_diff, diff_sum;
104
105    __m128i top[4];
106    __m128i bot[4];
107    __m128i sum_t[4];
108    __m128i sum_b[4];
109    __m128i zero;
110
111
112    zero = _mm_setzero_si128();
113
114    for(i = 0; i < 4; i++)
115    {
116        /* Load top */
117        top[i] = (__m128i)_mm_loadl_epi64((__m128i *) (pu1_top));
118        pu1_top += top_strd;
119
120        /* Load bottom */
121        bot[i] = (__m128i)_mm_loadl_epi64((__m128i *) (pu1_bot));
122        pu1_bot += bot_strd;
123
124        /* Unpack */
125        top[i] = _mm_unpacklo_epi8(top[i], zero);
126        bot[i] = _mm_unpacklo_epi8(bot[i], zero);
127
128        /* Compute row sums */
129        sum_t[i]  = _mm_sad_epu8(top[i], zero);
130        sum_b[i]  = _mm_sad_epu8(bot[i], zero);
131    }
132
133    /* Compute row based alt and adj */
134    for(i = 0; i < 4; i += 2)
135    {
136        sum_1 = _mm_cvtsi128_si32(sum_t[i + 0]);
137        sum_2 = _mm_cvtsi128_si32(sum_b[i + 0]);
138        sum_diff = ABS_DIF(sum_1, sum_2);
139        if(sum_diff >= RSUM_CSUM_THRESH)
140            adj[0] += sum_diff;
141
142        sum_3 = _mm_cvtsi128_si32(sum_t[i + 1]);
143        sum_4 = _mm_cvtsi128_si32(sum_b[i + 1]);
144        sum_diff = ABS_DIF(sum_3, sum_4);
145        if(sum_diff >= RSUM_CSUM_THRESH)
146            adj[0] += sum_diff;
147
148        alt[0] += ABS_DIF(sum_1, sum_3);
149        alt[0] += ABS_DIF(sum_2, sum_4);
150
151        sum_1 = _mm_cvtsi128_si32(_mm_srli_si128(sum_t[i + 0], 8));
152        sum_2 = _mm_cvtsi128_si32(_mm_srli_si128(sum_b[i + 0], 8));
153        sum_diff = ABS_DIF(sum_1, sum_2);
154        if(sum_diff >= RSUM_CSUM_THRESH)
155            adj[1] += sum_diff;
156
157        sum_3 = _mm_cvtsi128_si32(_mm_srli_si128(sum_t[i + 1], 8));
158        sum_4 = _mm_cvtsi128_si32(_mm_srli_si128(sum_b[i + 1], 8));
159        sum_diff = ABS_DIF(sum_3, sum_4);
160        if(sum_diff >= RSUM_CSUM_THRESH)
161            adj[1] += sum_diff;
162
163        alt[1] += ABS_DIF(sum_1, sum_3);
164        alt[1] += ABS_DIF(sum_2, sum_4);
165    }
166
167    /* Compute column based adj */
168    {
169        __m128i avg1, avg2;
170        __m128i top_avg, bot_avg;
171        __m128i min, max, diff, thresh;
172        __m128i mask;
173        avg1 = _mm_avg_epu8(top[0], top[1]);
174        avg2 = _mm_avg_epu8(top[2], top[3]);
175        top_avg = _mm_avg_epu8(avg1, avg2);
176
177        avg1 = _mm_avg_epu8(bot[0], bot[1]);
178        avg2 = _mm_avg_epu8(bot[2], bot[3]);
179        bot_avg = _mm_avg_epu8(avg1, avg2);
180
181        min = _mm_min_epu8(top_avg, bot_avg);
182        max = _mm_max_epu8(top_avg, bot_avg);
183
184        diff = _mm_sub_epi16(max, min);
185        thresh = _mm_set1_epi16((RSUM_CSUM_THRESH >> 2) - 1);
186
187        mask = _mm_cmpgt_epi16(diff, thresh);
188        diff = _mm_and_si128(diff, mask);
189
190        diff_sum = _mm_extract_epi16(diff, 0);
191        diff_sum += _mm_extract_epi16(diff, 1);
192        diff_sum += _mm_extract_epi16(diff, 2);
193        diff_sum += _mm_extract_epi16(diff, 3);
194
195        adj[0] += diff_sum << 2;
196
197        diff_sum = _mm_extract_epi16(diff, 4);
198        diff_sum += _mm_extract_epi16(diff, 5);
199        diff_sum += _mm_extract_epi16(diff, 6);
200        diff_sum += _mm_extract_epi16(diff, 7);
201
202        adj[1] += diff_sum << 2;
203
204    }
205
206    /* Compute column based alt */
207    {
208        __m128i avg1, avg2;
209        __m128i even_avg, odd_avg, diff;
210        avg1 = _mm_avg_epu8(top[0], bot[0]);
211        avg2 = _mm_avg_epu8(top[2], bot[2]);
212        even_avg = _mm_avg_epu8(avg1, avg2);
213
214        avg1 = _mm_avg_epu8(top[1], bot[1]);
215        avg2 = _mm_avg_epu8(top[3], bot[3]);
216        odd_avg = _mm_avg_epu8(avg1, avg2);
217
218        diff = _mm_sad_epu8(even_avg, odd_avg);
219
220
221        diff_sum = _mm_cvtsi128_si32(diff);
222        alt[0] += diff_sum << 2;
223
224        diff_sum = _mm_cvtsi128_si32(_mm_srli_si128(diff, 8));
225        alt[1] += diff_sum << 2;
226
227    }
228    alt[0] += (alt[0] >> SAD_BIAS_MULT_SHIFT) + (SAD_BIAS_ADDITIVE >> 1);
229    alt[1] += (alt[1] >> SAD_BIAS_MULT_SHIFT) + (SAD_BIAS_ADDITIVE >> 1);
230
231    ca    = (alt[0] < adj[0]);
232    ca   |= (alt[1] < adj[1]);
233
234    return ca;
235}
236
237