1;// 2;// Copyright (C) 2007-2008 ARM Limited 3;// 4;// Licensed under the Apache License, Version 2.0 (the "License"); 5;// you may not use this file except in compliance with the License. 6;// You may obtain a copy of the License at 7;// 8;// http://www.apache.org/licenses/LICENSE-2.0 9;// 10;// Unless required by applicable law or agreed to in writing, software 11;// distributed under the License is distributed on an "AS IS" BASIS, 12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13;// See the License for the specific language governing permissions and 14;// limitations under the License. 15;// 16;// 17;// 18;// File Name: omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s 19;// OpenMAX DL: v1.0.2 20;// Revision: 9641 21;// Date: Thursday, February 7, 2008 22;// 23;// 24;// 25;// 26 27 28 INCLUDE omxtypes_s.h 29 INCLUDE armCOMM_s.h 30 31 M_VARIANTS ARM1136JS 32 33 34 IF ARM1136JS 35 36MASK_0 EQU 0x00000000 37MASK_1 EQU 0x01010101 38LOOP_COUNT EQU 0x50000000 39 40;// Declare input registers 41 42pSrcDst RN 0 43srcdstStep RN 1 44pAlphaArg RN 2 45pBetaArg RN 3 46 47pThresholds RN 6 48pBS RN 9 49pQ0 RN 0 50bS RN 10 51 52alpha RN 6 53alpha0 RN 6 54alpha1 RN 8 55 56beta RN 7 57beta0 RN 7 58beta1 RN 9 59 60;// Declare Local/Temporary variables 61 62;// Pixels 63p_0 RN 3 64p_1 RN 5 65q_0 RN 8 66q_1 RN 9 67 68;// Filtering 69 70dp0q0 RN 12 71dp1p0 RN 12 72dq1q0 RN 12 73 74ap0q0 RN 4 75filt RN 2 76 77m00 RN 14 78m01 RN 11 79 80pQ0 RN 0 81Step RN 1 82 83;// Output 84 85P_0 RN 6 86Q_0 RN 7 87 88;//Declarations for bSLT4 kernel 89 90tC RN 12 91tC0 RN 5 92tC1 RN 12 93pos RN 5 94neg RN 9 95 96;//Declarations for bSGE4 kernel 97 98 99;// Miscellanous 100XY RN 8 101 102a RN 10 103t1 RN 10 104t2 RN 12 105t3 RN 14 106t4 RN 6 107t5 RN 5 108 109 110 ;// Allocate stack memory 111 M_ALLOC4 ppThresholds,4 112 M_ALLOC8 pAlphaBeta0,8 113 M_ALLOC8 pAlphaBeta1,8 114 M_ALLOC8 pXYBS,4 115 M_ALLOC4 ppBS,4 116 117 ;// Function header 118 M_START omxVCM4P10_FilterDeblockingChroma_HorEdge_I, r11 119 120 ;//Input arguments on the stack 121 M_ARG ppThresholdsArg, 4 122 M_ARG ppBSArg, 4 123 124 LDRB alpha1, [pAlphaArg,#1] 125 LDRB beta1, [pBetaArg,#1] 126 M_LDR pThresholds, ppThresholdsArg 127 LDR a,=MASK_1 128 LDRB beta0, [pBetaArg] 129 M_STR pThresholds, ppThresholds 130 LDRB alpha0, [pAlphaArg] 131 132 MUL alpha1, alpha1, a 133 MUL beta1, beta1, a 134 MUL alpha0, alpha0, a 135 MUL beta0, beta0, a 136 137 M_STRD alpha1, beta1, pAlphaBeta1 138 M_LDR pBS, ppBSArg 139 M_STRD alpha0, beta0, pAlphaBeta0 140 141 LDR XY,=LOOP_COUNT 142 M_STRD XY, pBS, pXYBS 143 144 SUB pQ0, pQ0, srcdstStep, LSL #1 145LoopY 146LoopX 147;//---------------Load Pixels------------------- 148 LDRH bS, [pBS], #2 149 150 M_STR pBS, ppBS 151 M_LDR p_1, [pQ0],srcdstStep 152 153 CMP bS, #0 154 155 M_LDR p_0, [pQ0],srcdstStep 156 M_LDR q_0, [pQ0],srcdstStep 157 M_LDR q_1, [pQ0] 158 LDR m01, =MASK_1 ;// 01010101 mask 159 BEQ NoFilterBS0 160 161 162 ;// p_0 = [r3p0 r2p0 r1p0 r0p0] 163 ;// p_1 = [r3p1 r2p1 r1p1 r0p1] 164 ;// q_0 = [r3q0 r2q0 r1q0 r0q0] 165 ;// q_1 = [r3q1 r2q1 r1q1 r0q1] 166 167;//--------------Filtering Decision ------------------- 168 MOV m00, #MASK_0 ;// 00000000 mask 169 170 MOV filt, m01 171 TST bS, #0xff00 172 MOVEQ filt, filt, LSR #16 173 TST bS, #0xff 174 MOVEQ filt, filt, LSL #16 175 TST bS, #4 176 177 178 ;// Check |p0-q0|<Alpha 179 USUB8 dp0q0, p_0, q_0 180 USUB8 a, q_0, p_0 181 SEL ap0q0, a, dp0q0 182 USUB8 a, ap0q0, alpha 183 SEL filt, m00, filt 184 185 ;// Check |p1-p0|<Beta 186 USUB8 dp1p0, p_1, p_0 187 USUB8 a, p_0, p_1 188 SEL a, a, dp1p0 189 USUB8 a, a, beta 190 SEL filt, m00, filt 191 192 ;// Check |q1-q0|<Beta 193 USUB8 dq1q0, q_1, q_0 194 USUB8 a, q_0, q_1 195 SEL a, a, dq1q0 196 USUB8 a, a, beta 197 SEL filt, m00, filt 198 199 BEQ bSLT4 200;//-------------------Filter-------------------- 201bSGE4 202 ;//---------bSGE4 Execution--------------- 203 CMP filt, #0 204 205 M_LDR pThresholds, ppThresholds 206 207 ;// Compute P0b 208 UHADD8 t1, p_0, q_1 209 BEQ NoFilterFilt0 210 MVN t2, p_1 211 UHSUB8 t1, t1, t2 212 USUB8 t2, filt, m01 213 EOR t1, t1, m01, LSL #7 214 215 ADD pThresholds,pThresholds, #2 216 217 ;// Compute Q0b 218 UHADD8 t2, q_0, p_1 219 MVN t3, q_1 220 UHSUB8 t2, t2, t3 221 M_STR pThresholds, ppThresholds 222 SEL P_0, t1, p_0 223 EOR t2, t2, m01, LSL #7 224 SEL Q_0, t2, q_0 225 226 SUB pQ0, pQ0, srcdstStep, LSL #1 227 B StoreResultAndExit 228 229;//---------- Exit of LoopX -------------- 230;//---- for the case of no filtering ----- 231 232NoFilterFilt0 233NoFilterBS0 234 M_LDR pThresholds, ppThresholds 235 SUB pQ0, pQ0, srcdstStep, LSL #1 236 SUB pQ0, pQ0, srcdstStep 237 ADD pQ0, pQ0, #4 238 ADD pThresholds, pThresholds, #2 239 240 ;// Load counter for LoopX 241 M_LDRD XY, pBS, pXYBS 242 M_STR pThresholds, ppThresholds 243 M_LDRD alpha, beta, pAlphaBeta0 244 245 ;// Align the pointer 246 ADDS XY, XY, XY 247 M_STR XY, pXYBS 248 BCC LoopY 249 B ExitLoopY 250 251bSLT4 252 ;//---------bSLT4 Execution--------------- 253 M_LDR pThresholds, ppThresholds 254 CMP filt, #0 255 256 ;// Since beta <= 18 and alpha <= 255 we know 257 ;// -254 <= p0-q0 <= 254 258 ;// -17 <= q1-q0 <= 17 259 ;// -17 <= p1-p0 <= 17 260 261 ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3)) 262 ;// 263 ;// Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3 264 ;// = (4*q0 - 4*p0 + p1 - q1 + 4)>>3 265 ;// = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3 266 267 USUB8 t1, p_1, p_0 268 USUB8 t2, q_1, q_0 269 BEQ NoFilterFilt0 270 271 LDRB tC0, [pThresholds],#1 272 SSUB8 t1, t1, t2 273 LDRB tC1, [pThresholds],#1 274 M_STR pThresholds, ppThresholds 275 UHSUB8 t4, p_0, q_0 276 ORR tC, tC0, tC1, LSL #16 277 USUB8 t5, p_0, q_0 278 AND t5, t5, m01 279 SHSUB8 t1, t1, t5 280 ORR tC, tC, LSL #8 281 SSUB8 t1, t1, t5 282 SHSUB8 t1, t1, t4 283 UQADD8 tC, tC, m01 284 SADD8 t1, t1, m01 285 USUB8 t5, filt, m01 286 SHSUB8 t1, t1, t4 287 SEL tC, tC, m00 288 289 ;// Split into positive and negative part and clip 290 291 SSUB8 t1, t1, m00 292 SEL pos, t1, m00 293 USUB8 neg, pos, t1 294 USUB8 t3, pos, tC 295 SEL pos, tC, pos 296 USUB8 t3, neg, tC 297 SEL neg, tC, neg 298 UQADD8 P_0, p_0, pos 299 UQSUB8 Q_0, q_0, pos 300 UQSUB8 P_0, P_0, neg 301 UQADD8 Q_0, Q_0, neg 302 303 SUB pQ0, pQ0, srcdstStep, LSL #1 304 305 ;// Choose to store the filtered 306 ;// value or the original pixel 307 USUB8 t1, filt, m01 308 SEL P_0, P_0, p_0 309 SEL Q_0, Q_0, q_0 310 311StoreResultAndExit 312 313 ;//---------Store result--------------- 314 315 ;// P_0 = [r0p0 r1p0 r2p0 r3p0] 316 ;// Q_0 = [r0q0 r1q0 r2q0 r3q0] 317 318 M_STR P_0, [pQ0], srcdstStep 319 STR Q_0, [pQ0], #4 320 321 M_LDRD XY, pBS, pXYBS 322 M_LDRD alpha, beta, pAlphaBeta0 323 324 SUB pQ0, pQ0, srcdstStep, LSL #1 325 326 ADDS XY, XY, XY 327 M_STR XY, pXYBS 328 BCC LoopX 329 330;//-------- Common Exit of LoopY ----------------- 331 ;// Align the pointers 332 333ExitLoopY 334 ADD pBS, pBS, #4 335 M_LDRD alpha, beta, pAlphaBeta1 336 SUB pQ0, pQ0, #8 337 ADD pQ0, pQ0, srcdstStep, LSL #2 338 M_STRD alpha, beta, pAlphaBeta0 339 340 BNE LoopY 341 MOV r0, #OMX_Sts_NoErr 342 343;//-----------------End Filter-------------------- 344 M_END 345 346 ENDIF 347 348 END 349 350 351