1//****************************************************************************** 2//* 3//* Copyright (C) 2015 The Android Open Source Project 4//* 5//* Licensed under the Apache License, Version 2.0 (the "License"); 6//* you may not use this file except in compliance with the License. 7//* You may obtain a copy of the License at: 8//* 9//* http://www.apache.org/licenses/LICENSE-2.0 10//* 11//* Unless required by applicable law or agreed to in writing, software 12//* distributed under the License is distributed on an "AS IS" BASIS, 13//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14//* See the License for the specific language governing permissions and 15//* limitations under the License. 16//* 17//***************************************************************************** 18//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19//*/ 20 21//****************************************************************************** 22//* 23//* @brief 24//* This file contains definitions of routines for spatial filter 25//* 26//* @author 27//* Ittiam 28//* 29//* @par List of Functions: 30//* - ideint_spatial_filter_a9() 31//* 32//* @remarks 33//* None 34//* 35//******************************************************************************* 36 37 38//****************************************************************************** 39//* 40//* @brief Performs spatial filtering 41//* 42//* @par Description 43//* This functions performs edge adaptive spatial filtering on a 8x8 block 44//* 45//* @param[in] pu1_src 46//* UWORD8 pointer to the source 47//* 48//* @param[in] pu1_out 49//* UWORD8 pointer to the destination 50//* 51//* @param[in] src_strd 52//* source stride 53//* 54//* @param[in] src_strd 55//* destination stride 56//* 57//* @returns 58//* None 59//* 60//* @remarks 61//* 62//****************************************************************************** 63 64 .global ideint_spatial_filter_av8 65 66ideint_spatial_filter_av8: 67 68 movi v16.8h, #0 69 movi v18.8h, #0 70 movi v20.8h, #0 71 72 // Backup x0 73 mov x10, x0 74 75 // Load from &pu1_row_1[0] 76 sub x5, x0, #1 77 ld1 {v0.8b}, [x0], x2 78 79 // Load from &pu1_row_1[-1] 80 ld1 {v1.8b}, [x5] 81 add x5, x5, #2 82 83 // Load from &pu1_row_1[1] 84 ld1 {v2.8b}, [x5] 85 86 // Number of rows 87 mov x4, #4 88 89 // EDGE_BIAS_0 90 movi v30.2s, #5 91 92 // EDGE_BIAS_1 93 movi v31.2s, #7 94 95detect_edge: 96 // Load from &pu1_row_2[0] 97 sub x5, x0, #1 98 ld1 {v3.8b}, [x0], x2 99 100 // Load from &pu1_row_2[-1] 101 ld1 {v4.8b}, [x5] 102 add x5, x5, #2 103 104 // Load from &pu1_row_2[1] 105 ld1 {v5.8b}, [x5] 106 107 // Calculate absolute differences 108 // pu1_row_1[i] - pu1_row_2[i] 109 uabal v16.8h, v0.8b, v3.8b 110 111 // pu1_row_1[i - 1] - pu1_row_2[i + 1] 112 uabal v18.8h, v1.8b, v5.8b 113 114 // pu1_row_1[i + 1] - pu1_row_2[i - 1] 115 uabal v20.8h, v2.8b, v4.8b 116 117 mov v0.8b, v3.8b 118 mov v1.8b, v4.8b 119 mov v2.8b, v5.8b 120 121 subs x4, x4, #1 122 bgt detect_edge 123 124 // Calculate sum of absolute differeces for each edge 125 addp v16.8h, v16.8h, v16.8h 126 addp v18.8h, v18.8h, v18.8h 127 addp v20.8h, v20.8h, v20.8h 128 129 uaddlp v16.2s, v16.4h 130 uaddlp v18.2s, v18.4h 131 uaddlp v20.2s, v20.4h 132 133 // adiff[0] *= EDGE_BIAS_0; 134 mul v16.2s, v16.2s, v30.2s 135 136 // adiff[1] *= EDGE_BIAS_1; 137 mul v18.2s, v18.2s, v31.2s 138 139 // adiff[2] *= EDGE_BIAS_1; 140 mul v20.2s, v20.2s, v31.2s 141 142 // Move the differences to ARM registers 143 144 145 // Compute shift for first half of the block 146compute_shift_1: 147 smov x5, v16.s[0] 148 smov x6, v18.s[0] 149 smov x7, v20.s[0] 150 151 // Compute shift 152 mov x8, #0 153 154 // adiff[2] <= adiff[1] 155 cmp x7, x6 156 bgt dir_45_gt_135_1 157 158 // adiff[2] <= adiff[0] 159 cmp x7, x5 160 mov x11, #1 161 csel x8, x11, x8, le 162 163 b compute_shift_2 164dir_45_gt_135_1: 165 166 // adiff[1] <= adiff[0] 167 cmp x6, x5 168 // Move -1 if less than or equal to 169 movn x11, #0 170 csel x8, x11, x8, le 171 172 173compute_shift_2: 174 // Compute shift for first half of the block 175 smov x5, v16.s[1] 176 smov x6, v18.s[1] 177 smov x7, v20.s[1] 178 179 // Compute shift 180 mov x9, #0 181 182 // adiff[2] <= adiff[1] 183 cmp x7, x6 184 bgt dir_45_gt_135_2 185 186 // adiff[2] <= adiff[0] 187 cmp x7, x5 188 mov x11, #1 189 csel x9, x11, x9, le 190 191 b interpolate 192 193dir_45_gt_135_2: 194 // adiff[1] <= adiff[0] 195 cmp x6, x5 196 197 // Move -1 if less than or equal to 198 movn x11, #0 199 csel x9, x11, x9, le 200 201interpolate: 202 add x4, x10, x8 203 add x5, x10, x2 204 sub x5, x5, x8 205 206 add x10, x10, #4 207 add x6, x10, x9 208 add x7, x10, x2 209 sub x7, x7, x9 210 mov x8, #4 211 212filter_loop: 213 ld1 {v0.s}[0], [x4], x2 214 ld1 {v2.s}[0], [x5], x2 215 216 ld1 {v0.s}[1], [x6], x2 217 ld1 {v2.s}[1], [x7], x2 218 219 urhadd v4.8b, v0.8b, v2.8b 220 st1 {v4.2s}, [x1], x3 221 222 subs x8, x8, #1 223 bgt filter_loop 224 225 ret 226