1@/****************************************************************************** 2@ * 3@ * Copyright (C) 2015 The Android Open Source Project 4@ * 5@ * Licensed under the Apache License, Version 2.0 (the "License"); 6@ * you may not use this file except in compliance with the License. 7@ * You may obtain a copy of the License at: 8@ * 9@ * http://www.apache.org/licenses/LICENSE-2.0 10@ * 11@ * Unless required by applicable law or agreed to in writing, software 12@ * distributed under the License is distributed on an "AS IS" BASIS, 13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14@ * See the License for the specific language governing permissions and 15@ * limitations under the License. 16@ * 17@ ***************************************************************************** 18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19@*/ 20 21@****************************************************************************** 22@* 23@* @brief 24@* This file contains definitions of routines for spatial filter 25@* 26@* @author 27@* Ittiam 28@* 29@* @par List of Functions: 30@* - ideint_cac_8x8_a9() 31@* 32@* @remarks 33@* None 34@* 35@******************************************************************************* 36 37 38@****************************************************************************** 39@* 40@* @brief Calculates Combing Artifact 41@* 42@* @par Description 43@* This functions calculates combing artifact check (CAC) for given two fields 44@* 45@* @param[in] pu1_top 46@* UWORD8 pointer to top field 47@* 48@* @param[in] pu1_bot 49@* UWORD8 pointer to bottom field 50@* 51@* @param[in] top_strd 52@* Top field stride 53@* 54@* @param[in] bot_strd 55@* Bottom field stride 56@* 57@* @returns 58@* None 59@* 60@* @remarks 61@* 62@****************************************************************************** 63 64 .global ideint_cac_8x8_a9 65 66ideint_cac_8x8_a9: 67 68 stmfd sp!, {r4-r10, lr} 69 70 @ Load first row of top 71 vld1.u8 d28, [r0], r2 72 73 @ Load first row of bottom 74 vld1.u8 d29, [r1], r3 75 76 @ Load second row of top 77 vld1.u8 d30, [r0], r2 78 79 @ Load second row of bottom 80 vld1.u8 d31, [r1], r3 81 82 83 @ Calculate row based adj and alt values 84 @ Get row sums 85 vpaddl.u8 q0, q14 86 87 vpaddl.u8 q1, q15 88 89 vpaddl.u16 q0, q0 90 91 vpaddl.u16 q1, q1 92 93 @ Both q0 and q1 have four 32 bit sums corresponding to first 4 rows 94 @ Pack q0 and q1 into a single register (sum does not exceed 16bits) 95 96 vshl.u32 q8, q1, #16 97 vorr.u32 q8, q0, q8 98 @ q8 now contains 8 sums 99 100 @ Load third row of top 101 vld1.u8 d24, [r0], r2 102 103 @ Load third row of bottom 104 vld1.u8 d25, [r1], r3 105 106 @ Load fourth row of top 107 vld1.u8 d26, [r0], r2 108 109 @ Load fourth row of bottom 110 vld1.u8 d27, [r1], r3 111 112 @ Get row sums 113 vpaddl.u8 q2, q12 114 115 vpaddl.u8 q3, q13 116 117 vpaddl.u16 q2, q2 118 119 vpaddl.u16 q3, q3 120 @ Both q2 and q3 have four 32 bit sums corresponding to last 4 rows 121 @ Pack q2 and q3 into a single register (sum does not exceed 16bits) 122 123 vshl.u32 q9, q3, #16 124 vorr.u32 q9, q2, q9 125 @ q9 now contains 8 sums 126 127 @ Compute absolute diff between top and bottom row sums 128 vabd.u16 d16, d16, d17 129 vabd.u16 d17, d18, d19 130 131 @ RSUM_CSUM_THRESH 132 vmov.u16 q9, #20 133 134 @ Eliminate values smaller than RSUM_CSUM_THRESH 135 vcge.u16 q10, q8, q9 136 vand.u16 q10, q8, q10 137 @ q10 now contains 8 absolute diff of sums above the threshold 138 139 140 @ Compute adj 141 vadd.u16 d20, d20, d21 142 143 @ d20 has four adj values for two sub-blocks 144 145 @ Compute alt 146 vabd.u32 q0, q0, q1 147 vabd.u32 q2, q2, q3 148 149 vadd.u32 q0, q0, q2 150 vadd.u32 d21, d0, d1 151 @ d21 has two values for two sub-blocks 152 153 154 @ Calculate column based adj and alt values 155 156 vrhadd.u8 q0, q14, q15 157 vrhadd.u8 q1, q12, q13 158 vrhadd.u8 q0, q0, q1 159 160 vabd.u8 d0, d0, d1 161 162 @ RSUM_CSUM_THRESH >> 2 163 vmov.u8 d9, #5 164 165 @ Eliminate values smaller than RSUM_CSUM_THRESH >> 2 166 vcge.u8 d1, d0, d9 167 vand.u8 d0, d0, d1 168 @ d0 now contains 8 absolute diff of sums above the threshold 169 170 171 vpaddl.u8 d0, d0 172 vshl.u16 d0, d0, #2 173 174 @ Add row based adj 175 vadd.u16 d20, d0, d20 176 177 vpaddl.u16 d20, d20 178 @ d20 now contains 2 adj values 179 180 181 vrhadd.u8 d0, d28, d29 182 vrhadd.u8 d2, d24, d25 183 vrhadd.u8 d0, d0, d2 184 185 vrhadd.u8 d1, d30, d31 186 vrhadd.u8 d3, d26, d27 187 vrhadd.u8 d1, d1, d3 188 189 vabd.u8 d0, d0, d1 190 vpaddl.u8 d0, d0 191 192 vshl.u16 d0, d0, #2 193 vpaddl.u16 d0, d0 194 vadd.u32 d21, d0, d21 195 196 197 @ d21 now contains 2 alt values 198 199 @ SAD_BIAS_MULT_SHIFT 200 vshr.u32 d0, d21, #3 201 vadd.u32 d21, d21, d0 202 203 @ SAD_BIAS_ADDITIVE >> 1 204 vmov.u32 d0, #4 205 vadd.u32 d21, d21, d0 206 207 vclt.u32 d0, d21, d20 208 vpaddl.u32 d0, d0 209 210 vmov.u32 r0, d0[0] 211 cmp r0, #0 212 movne r0, #1 213 ldmfd sp!, {r4-r10, pc} 214