1;// 2;// 3;// File Name: omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s 4;// OpenMAX DL: v1.0.2 5;// Revision: 9641 6;// Date: Thursday, February 7, 2008 7;// 8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9;// 10;// 11;// 12 13 INCLUDE omxtypes_s.h 14 INCLUDE armCOMM_s.h 15 16 M_VARIANTS ARM1136JS 17 18 19 IF ARM1136JS 20 21 22MASK_0 EQU 0x00000000 23MASK_1 EQU 0x01010101 24MASK_2 EQU 0x0000ff00 25LOOP_COUNT EQU 0x50000000 26 27;// Declare input registers 28 29pSrcDst RN 0 30srcdstStep RN 1 31pAlphaArg RN 2 32pBetaArg RN 3 33 34pThresholds RN 6 35pBS RN 9 36pQ0 RN 0 37bS RN 2 38bSTemp RN 10 39 40alpha RN 6 41alpha0 RN 6 42alpha1 RN 8 43 44beta RN 7 45beta0 RN 7 46beta1 RN 9 47 48;// Declare Local/Temporary variables 49 50;// Pixels 51p_0 RN 3 52p_1 RN 5 53q_0 RN 8 54q_1 RN 9 55 56;// Unpacking 57mask RN 11 58 59row0 RN 2 60row1 RN 4 61row2 RN 5 62row3 RN 3 63 64row4 RN 8 65row5 RN 9 66row6 RN 10 67row7 RN 12 68 69tunpk0 RN 2 70tunpk2 RN 10 71tunpk3 RN 12 72 73tunpk4 RN 4 74tunpk5 RN 5 75tunpk6 RN 14 76tunpk7 RN 2 77 78;// Filtering 79 80dp0q0 RN 12 81dp1p0 RN 12 82dq1q0 RN 12 83 84ap0q0 RN 4 85filt RN 2 86 87m00 RN 14 88m01 RN 11 89 90pQ0 RN 0 91Step RN 1 92 93;// Output 94 95P_0 RN 6 96Q_0 RN 7 97 98;//Declarations for bSLT4 kernel 99 100tC RN 12 101tC0 RN 5 102tC1 RN 12 103pos RN 5 104neg RN 9 105 106;//Declarations for bSGE4 kernel 107 108 109;// Miscellanous 110XY RN 8 111 112a RN 10 113t1 RN 10 114t2 RN 12 115t3 RN 14 116t4 RN 6 117t5 RN 5 118 119 120 ;// Allocate stack memory 121 M_ALLOC4 ppThresholds,4 122 M_ALLOC8 pAlphaBeta0,8 123 M_ALLOC8 pAlphaBeta1,8 124 M_ALLOC8 pXYBS,4 125 M_ALLOC4 ppBS,4 126 127 ;// Function header 128 M_START omxVCM4P10_FilterDeblockingChroma_VerEdge_I, r11 129 130 ;//Input arguments on the stack 131 M_ARG ppThresholdsArg, 4 132 M_ARG ppBSArg, 4 133 134 LDRB alpha1, [pAlphaArg,#1] 135 LDRB beta1, [pBetaArg,#1] 136 M_LDR pThresholds, ppThresholdsArg 137 LDR a,=MASK_1 138 LDRB beta0, [pBetaArg] 139 M_STR pThresholds, ppThresholds 140 LDRB alpha0, [pAlphaArg] 141 142 MUL alpha1, alpha1, a 143 MUL beta1, beta1, a 144 MUL alpha0, alpha0, a 145 MUL beta0, beta0, a 146 147 M_STRD alpha1, beta1, pAlphaBeta1 148 M_LDR pBS, ppBSArg 149 M_STRD alpha0, beta0, pAlphaBeta0 150 151 LDR XY,=LOOP_COUNT 152 M_STRD XY, pBS, pXYBS 153 154 155LoopY 156LoopX 157;//---------------Load Pixels------------------- 158 159;//----------------Pack q0-q1----------------------- 160 LDRH bS, [pBS], #8 161 LDR mask, =MASK_2 162 163 M_LDRH row4, [pQ0], srcdstStep 164 CMP bS, #0 165 M_STR pBS, ppBS 166 M_LDRH row5, [pQ0], srcdstStep 167 BEQ.W NoFilterBS0 168 LDRH row6, [pQ0] 169 LDRH row7, [pQ0, srcdstStep] 170 171 ;// row4 = [0 0 r0q0 r0q1] 172 ;// row5 = [0 0 r1q0 r1q1] 173 ;// row6 = [0 0 r2q0 r2q1] 174 ;// row7 = [0 0 r3q0 r3q1] 175 176 AND tunpk4, mask, row4 177 AND tunpk5, mask, row4, LSL#8 178 UXTAB tunpk4, tunpk4, row5, ROR#8 179 UXTAB tunpk5, tunpk5, row5 180 AND tunpk6, mask, row6 181 AND tunpk7, mask, row6, LSL#8 182 UXTAB tunpk6, tunpk6, row7, ROR#8 183 UXTAB tunpk7, tunpk7, row7 184 185 ;// tunpk4 = [0 0 r0q0 r1q0] 186 ;// tunpk5 = [0 0 r0q1 r1q1] 187 ;// tunpk6 = [0 0 r2q0 r3q0] 188 ;// tunpk7 = [0 0 r2q1 r3q1] 189 190 SUB pQ0, pQ0, srcdstStep, LSL #1 191 SUB pQ0, pQ0, #2 192 193 PKHBT q_1, tunpk6, tunpk4, LSL#16 194 PKHBT q_0, tunpk7, tunpk5, LSL#16 195 196 ;// q_0 = [r0q0 r1q0 r2q0 r3q0] 197 ;// q_1 = [r0q1 r1q1 r2q1 r3q1] 198 199 200;//----------------Pack p0-p1----------------------- 201 202 M_LDRH row0, [pQ0], srcdstStep 203 M_LDRH row1, [pQ0], srcdstStep 204 LDRH row2, [pQ0] 205 LDRH row3, [pQ0, srcdstStep] 206 207 ;// row0 = [0 0 r0p0 r0p1] 208 ;// row1 = [0 0 r1p0 r1p1] 209 ;// row2 = [0 0 r2p0 r2p1] 210 ;// row3 = [0 0 r3p0 r3p1] 211 212 AND tunpk2, mask, row0 213 AND tunpk6, mask, row0, LSL#8 214 UXTAB tunpk2, tunpk2, row1, ROR#8 215 UXTAB tunpk6, tunpk6, row1 216 217 AND tunpk0, mask, row2 218 AND tunpk3, mask, row2, LSL#8 219 UXTAB tunpk0, tunpk0, row3, ROR#8 220 UXTAB tunpk3, tunpk3, row3 221 222 ;// tunpk2 = [0 0 r0p0 r1p0] 223 ;// tunpk6 = [0 0 r0p1 r1p1] 224 ;// tunpk0 = [0 0 r2p0 r3p0] 225 ;// tunpk3 = [0 0 r2p1 r3p1] 226 227 PKHBT p_0, tunpk0, tunpk2, LSL#16 228 M_LDR bSTemp, ppBS 229 PKHBT p_1, tunpk3, tunpk6, LSL#16 230 231 ;// p_0 = [r0p0 r1p0 r2p0 r3p0] 232 ;// p_1 = [r0p1 r1p1 r2p1 r3p1] 233 234;//--------------Filtering Decision ------------------- 235 USUB8 dp0q0, p_0, q_0 236 LDR m01, =MASK_1 237 LDRH bSTemp, [bSTemp ,#-8] 238 MOV m00, #MASK_0 ;// 00000000 mask 239 240 MOV filt, m01 241 TST bSTemp, #0xff00 242 MOVEQ filt, filt, LSL #16 243 TST bSTemp, #0xff 244 MOVEQ filt, filt, LSR #16 245 TST bSTemp, #4 246 247 ;// Check |p0-q0|<Alpha 248 USUB8 a, q_0, p_0 249 SEL ap0q0, a, dp0q0 250 USUB8 a, ap0q0, alpha 251 SEL filt, m00, filt 252 253 ;// Check |p1-p0|<Beta 254 USUB8 dp1p0, p_1, p_0 255 USUB8 a, p_0, p_1 256 SEL a, a, dp1p0 257 USUB8 a, a, beta 258 SEL filt, m00, filt 259 260 ;// Check |q1-q0|<Beta 261 USUB8 dq1q0, q_1, q_0 262 USUB8 a, q_0, q_1 263 SEL a, a, dq1q0 264 USUB8 a, a, beta 265 SEL filt, m00, filt 266 267 BEQ bSLT4 268;//-------------------Filter-------------------- 269bSGE4 270 ;//---------bSGE4 Execution--------------- 271 CMP filt, #0 272 273 M_LDR pThresholds, ppThresholds 274 275 ;// Compute P0b 276 UHADD8 t1, p_0, q_1 277 BEQ NoFilterFilt0 278 MVN t2, p_1 279 UHSUB8 t1, t1, t2 280 USUB8 t2, filt, m01 281 EOR t1, t1, m01, LSL #7 282 283 ADD pThresholds,pThresholds, #4 284 285 ;// Compute Q0b 286 UHADD8 t2, q_0, p_1 287 MVN t3, q_1 288 UHSUB8 t2, t2, t3 289 M_STR pThresholds, ppThresholds 290 SEL P_0, t1, p_0 291 EOR t2, t2, m01, LSL #7 292 SEL Q_0, t2, q_0 293 294 B StoreResultAndExit 295 296;//---------- Exit of LoopX -------------- 297;//---- for the case of no filtering ----- 298 299NoFilterFilt0 300 ADD pQ0, pQ0, #2 301NoFilterBS0 302 M_LDR pThresholds, ppThresholds 303 SUB pQ0, pQ0, srcdstStep, LSL #1 304 ADD pQ0, pQ0, #4 305 ADD pThresholds, pThresholds, #4 306 ;// Load counter for LoopX 307 M_LDRD XY, pBS, pXYBS 308 M_STR pThresholds, ppThresholds 309 M_LDRD alpha, beta, pAlphaBeta1 310 311 ;// Align the pointer 312 ADDS XY, XY, XY 313 M_STR XY, pXYBS 314 BCC LoopY 315 B ExitLoopY 316 317bSLT4 318 ;//---------bSLT4 Execution--------------- 319 M_LDR pThresholds, ppThresholds 320 CMP filt, #0 321 322 323 ;// Since beta <= 18 and alpha <= 255 we know 324 ;// -254 <= p0-q0 <= 254 325 ;// -17 <= q1-q0 <= 17 326 ;// -17 <= p1-p0 <= 17 327 328 ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3)) 329 ;// 330 ;// Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3 331 ;// = (4*q0 - 4*p0 + p1 - q1 + 4)>>3 332 ;// = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3 333 334 USUB8 t1, p_1, p_0 335 USUB8 t2, q_1, q_0 336 BEQ NoFilterFilt0 337 338 LDRB tC0, [pThresholds], #1 339 SSUB8 t1, t1, t2 340 LDRB tC1, [pThresholds], #3 341 M_STR pThresholds, ppThresholds 342 UHSUB8 t4, p_0, q_0 343 ORR tC, tC1, tC0, LSL #16 344 USUB8 t5, p_0, q_0 345 AND t5, t5, m01 346 SHSUB8 t1, t1, t5 347 ORR tC, tC, LSL #8 348 SSUB8 t1, t1, t5 349 SHSUB8 t1, t1, t4 350 UQADD8 tC, tC, m01 351 SADD8 t1, t1, m01 352 USUB8 t5, filt, m01 353 SHSUB8 t1, t1, t4 354 SEL tC, tC, m00 355 356 ;// Split into positive and negative part and clip 357 358 SSUB8 t1, t1, m00 359 SEL pos, t1, m00 360 USUB8 neg, pos, t1 361 USUB8 t3, pos, tC 362 SEL pos, tC, pos 363 USUB8 t3, neg, tC 364 SEL neg, tC, neg 365 UQADD8 P_0, p_0, pos 366 UQSUB8 Q_0, q_0, pos 367 UQSUB8 P_0, P_0, neg 368 UQADD8 Q_0, Q_0, neg 369 370 ;// Choose to store the filtered 371 ;// value or the original pixel 372 USUB8 t1, filt, m01 373 SEL P_0, P_0, p_0 374 SEL Q_0, Q_0, q_0 375 376StoreResultAndExit 377 378 ;//---------Store result--------------- 379 380 ;// P_0 = [r0p0 r1p0 r2p0 r3p0] 381 ;// Q_0 = [r0q0 r1q0 r2q0 r3q0] 382 383 SUB pQ0, pQ0, srcdstStep, LSL #1 384 ADD pQ0, pQ0, #1 385 386 MOV t1, Q_0, LSR #24 387 STRB t1, [pQ0, #1] 388 MOV t1, P_0, LSR #24 389 M_STRB t1, [pQ0], srcdstStep 390 391 MOV t1, Q_0, LSR #16 392 STRB t1, [pQ0, #1] 393 MOV t1, P_0, LSR #16 394 M_STRB t1, [pQ0], srcdstStep 395 396 MOV t1, P_0, LSR #8 397 STRB t1, [pQ0] 398 STRB P_0, [pQ0, srcdstStep] 399 MOV t1, Q_0, LSR #8 400 STRB t1, [pQ0, #1]! 401 STRB Q_0, [pQ0, srcdstStep] 402 403 M_LDRD XY, pBS, pXYBS 404 M_LDRD alpha, beta, pAlphaBeta1 405 406 SUB pQ0, pQ0, srcdstStep, LSL #1 407 ADD pQ0, pQ0, #4 408 409 ADDS XY, XY, XY 410 M_STR XY, pXYBS 411 BCC LoopX 412 413;//-------- Common Exit of LoopY ----------------- 414 ;// Align the pointers 415 416ExitLoopY 417 418 M_LDR pThresholds, ppThresholds 419 SUB pQ0, pQ0, #8 420 ADD pQ0, pQ0, srcdstStep, LSL #2 421 SUB pBS, pBS, #14 422 SUB pThresholds, pThresholds, #6 423 M_STR pThresholds, ppThresholds 424 425 M_LDRD alpha, beta, pAlphaBeta0 426 427 BNE LoopY 428 MOV r0, #OMX_Sts_NoErr 429;//-----------------End Filter-------------------- 430 431 M_END 432 433 ENDIF 434 435 END 436 437 438