1/****************************************************************************** 2 * 3 * Copyright (C) 2015 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19*/ 20/** 21******************************************************************************* 22* @file 23* ideint_cac_ssse3.c 24* 25* @brief 26* This file include the definitions of the combing artifact check function 27* of the de-interlacer and some variant of that. 28* 29* @author 30* Ittiam 31* 32* @par List of Functions: 33* cac_4x8() 34* ideint_cac() 35* 36* @remarks 37* In the de-interlacer workspace, cac is not a seperate assembly module as 38* it comes along with the de_int_decision() function. But in C-Model, to 39* keep the things cleaner, it was made to be a separate function during 40* cac experiments long after the assembly was written by Mudit. 41* 42******************************************************************************* 43*/ 44/*****************************************************************************/ 45/* File Includes */ 46/*****************************************************************************/ 47/* System include files */ 48#include <stdio.h> 49#include <stdint.h> 50#include <string.h> 51#include <stdlib.h> 52#include <immintrin.h> 53 54/* User include files */ 55#include "icv_datatypes.h" 56#include "icv_macros.h" 57#include "icv.h" 58#include "icv_variance.h" 59#include "icv_sad.h" 60#include "ideint.h" 61#include "ideint_defs.h" 62#include "ideint_structs.h" 63#include "ideint_cac.h" 64 65/** 66******************************************************************************* 67* 68* @brief 69* Combing artifact check function for 8x8 block 70* 71* @par Description 72* Determines CAC for 8x8 block by calling 8x4 CAC function 73* 74* @param[in] pu1_top 75* Top field 76* 77* @param[in] pu1_bot 78* Bottom field 79* 80* @param[in] top_strd 81* Top field Stride 82* 83* @param[in] bot_strd 84* Bottom field stride 85* 86* @returns 87* combing artifact flag (1 = detected, 0 = not detected) 88* 89* @remarks 90* 91******************************************************************************* 92*/ 93WORD32 ideint_cac_8x8_ssse3(UWORD8 *pu1_top, 94 UWORD8 *pu1_bot, 95 WORD32 top_strd, 96 WORD32 bot_strd) 97{ 98 WORD32 ca; /* combing artifact result */ 99 WORD32 i; 100 WORD32 adj[2] = {0}; 101 WORD32 alt[2] = {0}; 102 WORD32 sum_1, sum_2, sum_3, sum_4; 103 WORD32 sum_diff, diff_sum; 104 105 __m128i top[4]; 106 __m128i bot[4]; 107 __m128i sum_t[4]; 108 __m128i sum_b[4]; 109 __m128i zero; 110 111 112 zero = _mm_setzero_si128(); 113 114 for(i = 0; i < 4; i++) 115 { 116 /* Load top */ 117 top[i] = (__m128i)_mm_loadl_epi64((__m128i *) (pu1_top)); 118 pu1_top += top_strd; 119 120 /* Load bottom */ 121 bot[i] = (__m128i)_mm_loadl_epi64((__m128i *) (pu1_bot)); 122 pu1_bot += bot_strd; 123 124 /* Unpack */ 125 top[i] = _mm_unpacklo_epi8(top[i], zero); 126 bot[i] = _mm_unpacklo_epi8(bot[i], zero); 127 128 /* Compute row sums */ 129 sum_t[i] = _mm_sad_epu8(top[i], zero); 130 sum_b[i] = _mm_sad_epu8(bot[i], zero); 131 } 132 133 /* Compute row based alt and adj */ 134 for(i = 0; i < 4; i += 2) 135 { 136 sum_1 = _mm_cvtsi128_si32(sum_t[i + 0]); 137 sum_2 = _mm_cvtsi128_si32(sum_b[i + 0]); 138 sum_diff = ABS_DIF(sum_1, sum_2); 139 if(sum_diff >= RSUM_CSUM_THRESH) 140 adj[0] += sum_diff; 141 142 sum_3 = _mm_cvtsi128_si32(sum_t[i + 1]); 143 sum_4 = _mm_cvtsi128_si32(sum_b[i + 1]); 144 sum_diff = ABS_DIF(sum_3, sum_4); 145 if(sum_diff >= RSUM_CSUM_THRESH) 146 adj[0] += sum_diff; 147 148 alt[0] += ABS_DIF(sum_1, sum_3); 149 alt[0] += ABS_DIF(sum_2, sum_4); 150 151 sum_1 = _mm_cvtsi128_si32(_mm_srli_si128(sum_t[i + 0], 8)); 152 sum_2 = _mm_cvtsi128_si32(_mm_srli_si128(sum_b[i + 0], 8)); 153 sum_diff = ABS_DIF(sum_1, sum_2); 154 if(sum_diff >= RSUM_CSUM_THRESH) 155 adj[1] += sum_diff; 156 157 sum_3 = _mm_cvtsi128_si32(_mm_srli_si128(sum_t[i + 1], 8)); 158 sum_4 = _mm_cvtsi128_si32(_mm_srli_si128(sum_b[i + 1], 8)); 159 sum_diff = ABS_DIF(sum_3, sum_4); 160 if(sum_diff >= RSUM_CSUM_THRESH) 161 adj[1] += sum_diff; 162 163 alt[1] += ABS_DIF(sum_1, sum_3); 164 alt[1] += ABS_DIF(sum_2, sum_4); 165 } 166 167 /* Compute column based adj */ 168 { 169 __m128i avg1, avg2; 170 __m128i top_avg, bot_avg; 171 __m128i min, max, diff, thresh; 172 __m128i mask; 173 avg1 = _mm_avg_epu8(top[0], top[1]); 174 avg2 = _mm_avg_epu8(top[2], top[3]); 175 top_avg = _mm_avg_epu8(avg1, avg2); 176 177 avg1 = _mm_avg_epu8(bot[0], bot[1]); 178 avg2 = _mm_avg_epu8(bot[2], bot[3]); 179 bot_avg = _mm_avg_epu8(avg1, avg2); 180 181 min = _mm_min_epu8(top_avg, bot_avg); 182 max = _mm_max_epu8(top_avg, bot_avg); 183 184 diff = _mm_sub_epi16(max, min); 185 thresh = _mm_set1_epi16((RSUM_CSUM_THRESH >> 2) - 1); 186 187 mask = _mm_cmpgt_epi16(diff, thresh); 188 diff = _mm_and_si128(diff, mask); 189 190 diff_sum = _mm_extract_epi16(diff, 0); 191 diff_sum += _mm_extract_epi16(diff, 1); 192 diff_sum += _mm_extract_epi16(diff, 2); 193 diff_sum += _mm_extract_epi16(diff, 3); 194 195 adj[0] += diff_sum << 2; 196 197 diff_sum = _mm_extract_epi16(diff, 4); 198 diff_sum += _mm_extract_epi16(diff, 5); 199 diff_sum += _mm_extract_epi16(diff, 6); 200 diff_sum += _mm_extract_epi16(diff, 7); 201 202 adj[1] += diff_sum << 2; 203 204 } 205 206 /* Compute column based alt */ 207 { 208 __m128i avg1, avg2; 209 __m128i even_avg, odd_avg, diff; 210 avg1 = _mm_avg_epu8(top[0], bot[0]); 211 avg2 = _mm_avg_epu8(top[2], bot[2]); 212 even_avg = _mm_avg_epu8(avg1, avg2); 213 214 avg1 = _mm_avg_epu8(top[1], bot[1]); 215 avg2 = _mm_avg_epu8(top[3], bot[3]); 216 odd_avg = _mm_avg_epu8(avg1, avg2); 217 218 diff = _mm_sad_epu8(even_avg, odd_avg); 219 220 221 diff_sum = _mm_cvtsi128_si32(diff); 222 alt[0] += diff_sum << 2; 223 224 diff_sum = _mm_cvtsi128_si32(_mm_srli_si128(diff, 8)); 225 alt[1] += diff_sum << 2; 226 227 } 228 alt[0] += (alt[0] >> SAD_BIAS_MULT_SHIFT) + (SAD_BIAS_ADDITIVE >> 1); 229 alt[1] += (alt[1] >> SAD_BIAS_MULT_SHIFT) + (SAD_BIAS_ADDITIVE >> 1); 230 231 ca = (alt[0] < adj[0]); 232 ca |= (alt[1] < adj[1]); 233 234 return ca; 235} 236 237