1;// 2;// 3;// File Name: omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s 4;// OpenMAX DL: v1.0.2 5;// Revision: 9641 6;// Date: Thursday, February 7, 2008 7;// 8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9;// 10;// 11;// 12 13 14 INCLUDE omxtypes_s.h 15 INCLUDE armCOMM_s.h 16 17 M_VARIANTS ARM1136JS 18 19 20 IF ARM1136JS 21 22MASK_0 EQU 0x00000000 23MASK_1 EQU 0x01010101 24LOOP_COUNT EQU 0x50000000 25 26;// Declare input registers 27 28pSrcDst RN 0 29srcdstStep RN 1 30pAlphaArg RN 2 31pBetaArg RN 3 32 33pThresholds RN 6 34pBS RN 9 35pQ0 RN 0 36bS RN 10 37 38alpha RN 6 39alpha0 RN 6 40alpha1 RN 8 41 42beta RN 7 43beta0 RN 7 44beta1 RN 9 45 46;// Declare Local/Temporary variables 47 48;// Pixels 49p_0 RN 3 50p_1 RN 5 51q_0 RN 8 52q_1 RN 9 53 54;// Filtering 55 56dp0q0 RN 12 57dp1p0 RN 12 58dq1q0 RN 12 59 60ap0q0 RN 4 61filt RN 2 62 63m00 RN 14 64m01 RN 11 65 66pQ0 RN 0 67Step RN 1 68 69;// Output 70 71P_0 RN 6 72Q_0 RN 7 73 74;//Declarations for bSLT4 kernel 75 76tC RN 12 77tC0 RN 5 78tC1 RN 12 79pos RN 5 80neg RN 9 81 82;//Declarations for bSGE4 kernel 83 84 85;// Miscellanous 86XY RN 8 87 88a RN 10 89t1 RN 10 90t2 RN 12 91t3 RN 14 92t4 RN 6 93t5 RN 5 94 95 96 ;// Allocate stack memory 97 M_ALLOC4 ppThresholds,4 98 M_ALLOC8 pAlphaBeta0,8 99 M_ALLOC8 pAlphaBeta1,8 100 M_ALLOC8 pXYBS,4 101 M_ALLOC4 ppBS,4 102 103 ;// Function header 104 M_START omxVCM4P10_FilterDeblockingChroma_HorEdge_I, r11 105 106 ;//Input arguments on the stack 107 M_ARG ppThresholdsArg, 4 108 M_ARG ppBSArg, 4 109 110 LDRB alpha1, [pAlphaArg,#1] 111 LDRB beta1, [pBetaArg,#1] 112 M_LDR pThresholds, ppThresholdsArg 113 LDR a,=MASK_1 114 LDRB beta0, [pBetaArg] 115 M_STR pThresholds, ppThresholds 116 LDRB alpha0, [pAlphaArg] 117 118 MUL alpha1, alpha1, a 119 MUL beta1, beta1, a 120 MUL alpha0, alpha0, a 121 MUL beta0, beta0, a 122 123 M_STRD alpha1, beta1, pAlphaBeta1 124 M_LDR pBS, ppBSArg 125 M_STRD alpha0, beta0, pAlphaBeta0 126 127 LDR XY,=LOOP_COUNT 128 M_STRD XY, pBS, pXYBS 129 130 SUB pQ0, pQ0, srcdstStep, LSL #1 131LoopY 132LoopX 133;//---------------Load Pixels------------------- 134 LDRH bS, [pBS], #2 135 136 M_STR pBS, ppBS 137 M_LDR p_1, [pQ0],srcdstStep 138 139 CMP bS, #0 140 141 M_LDR p_0, [pQ0],srcdstStep 142 M_LDR q_0, [pQ0],srcdstStep 143 M_LDR q_1, [pQ0] 144 LDR m01, =MASK_1 ;// 01010101 mask 145 BEQ NoFilterBS0 146 147 148 ;// p_0 = [r3p0 r2p0 r1p0 r0p0] 149 ;// p_1 = [r3p1 r2p1 r1p1 r0p1] 150 ;// q_0 = [r3q0 r2q0 r1q0 r0q0] 151 ;// q_1 = [r3q1 r2q1 r1q1 r0q1] 152 153;//--------------Filtering Decision ------------------- 154 MOV m00, #MASK_0 ;// 00000000 mask 155 156 MOV filt, m01 157 TST bS, #0xff00 158 MOVEQ filt, filt, LSR #16 159 TST bS, #0xff 160 MOVEQ filt, filt, LSL #16 161 TST bS, #4 162 163 164 ;// Check |p0-q0|<Alpha 165 USUB8 dp0q0, p_0, q_0 166 USUB8 a, q_0, p_0 167 SEL ap0q0, a, dp0q0 168 USUB8 a, ap0q0, alpha 169 SEL filt, m00, filt 170 171 ;// Check |p1-p0|<Beta 172 USUB8 dp1p0, p_1, p_0 173 USUB8 a, p_0, p_1 174 SEL a, a, dp1p0 175 USUB8 a, a, beta 176 SEL filt, m00, filt 177 178 ;// Check |q1-q0|<Beta 179 USUB8 dq1q0, q_1, q_0 180 USUB8 a, q_0, q_1 181 SEL a, a, dq1q0 182 USUB8 a, a, beta 183 SEL filt, m00, filt 184 185 BEQ bSLT4 186;//-------------------Filter-------------------- 187bSGE4 188 ;//---------bSGE4 Execution--------------- 189 CMP filt, #0 190 191 M_LDR pThresholds, ppThresholds 192 193 ;// Compute P0b 194 UHADD8 t1, p_0, q_1 195 BEQ NoFilterFilt0 196 MVN t2, p_1 197 UHSUB8 t1, t1, t2 198 USUB8 t2, filt, m01 199 EOR t1, t1, m01, LSL #7 200 201 ADD pThresholds,pThresholds, #2 202 203 ;// Compute Q0b 204 UHADD8 t2, q_0, p_1 205 MVN t3, q_1 206 UHSUB8 t2, t2, t3 207 M_STR pThresholds, ppThresholds 208 SEL P_0, t1, p_0 209 EOR t2, t2, m01, LSL #7 210 SEL Q_0, t2, q_0 211 212 SUB pQ0, pQ0, srcdstStep, LSL #1 213 B StoreResultAndExit 214 215;//---------- Exit of LoopX -------------- 216;//---- for the case of no filtering ----- 217 218NoFilterFilt0 219NoFilterBS0 220 M_LDR pThresholds, ppThresholds 221 SUB pQ0, pQ0, srcdstStep, LSL #1 222 SUB pQ0, pQ0, srcdstStep 223 ADD pQ0, pQ0, #4 224 ADD pThresholds, pThresholds, #2 225 226 ;// Load counter for LoopX 227 M_LDRD XY, pBS, pXYBS 228 M_STR pThresholds, ppThresholds 229 M_LDRD alpha, beta, pAlphaBeta0 230 231 ;// Align the pointer 232 ADDS XY, XY, XY 233 M_STR XY, pXYBS 234 BCC LoopY 235 B ExitLoopY 236 237bSLT4 238 ;//---------bSLT4 Execution--------------- 239 M_LDR pThresholds, ppThresholds 240 CMP filt, #0 241 242 ;// Since beta <= 18 and alpha <= 255 we know 243 ;// -254 <= p0-q0 <= 254 244 ;// -17 <= q1-q0 <= 17 245 ;// -17 <= p1-p0 <= 17 246 247 ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3)) 248 ;// 249 ;// Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3 250 ;// = (4*q0 - 4*p0 + p1 - q1 + 4)>>3 251 ;// = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3 252 253 USUB8 t1, p_1, p_0 254 USUB8 t2, q_1, q_0 255 BEQ NoFilterFilt0 256 257 LDRB tC0, [pThresholds],#1 258 SSUB8 t1, t1, t2 259 LDRB tC1, [pThresholds],#1 260 M_STR pThresholds, ppThresholds 261 UHSUB8 t4, p_0, q_0 262 ORR tC, tC0, tC1, LSL #16 263 USUB8 t5, p_0, q_0 264 AND t5, t5, m01 265 SHSUB8 t1, t1, t5 266 ORR tC, tC, LSL #8 267 SSUB8 t1, t1, t5 268 SHSUB8 t1, t1, t4 269 UQADD8 tC, tC, m01 270 SADD8 t1, t1, m01 271 USUB8 t5, filt, m01 272 SHSUB8 t1, t1, t4 273 SEL tC, tC, m00 274 275 ;// Split into positive and negative part and clip 276 277 SSUB8 t1, t1, m00 278 SEL pos, t1, m00 279 USUB8 neg, pos, t1 280 USUB8 t3, pos, tC 281 SEL pos, tC, pos 282 USUB8 t3, neg, tC 283 SEL neg, tC, neg 284 UQADD8 P_0, p_0, pos 285 UQSUB8 Q_0, q_0, pos 286 UQSUB8 P_0, P_0, neg 287 UQADD8 Q_0, Q_0, neg 288 289 SUB pQ0, pQ0, srcdstStep, LSL #1 290 291 ;// Choose to store the filtered 292 ;// value or the original pixel 293 USUB8 t1, filt, m01 294 SEL P_0, P_0, p_0 295 SEL Q_0, Q_0, q_0 296 297StoreResultAndExit 298 299 ;//---------Store result--------------- 300 301 ;// P_0 = [r0p0 r1p0 r2p0 r3p0] 302 ;// Q_0 = [r0q0 r1q0 r2q0 r3q0] 303 304 M_STR P_0, [pQ0], srcdstStep 305 STR Q_0, [pQ0], #4 306 307 M_LDRD XY, pBS, pXYBS 308 M_LDRD alpha, beta, pAlphaBeta0 309 310 SUB pQ0, pQ0, srcdstStep, LSL #1 311 312 ADDS XY, XY, XY 313 M_STR XY, pXYBS 314 BCC LoopX 315 316;//-------- Common Exit of LoopY ----------------- 317 ;// Align the pointers 318 319ExitLoopY 320 ADD pBS, pBS, #4 321 M_LDRD alpha, beta, pAlphaBeta1 322 SUB pQ0, pQ0, #8 323 ADD pQ0, pQ0, srcdstStep, LSL #2 324 M_STRD alpha, beta, pAlphaBeta0 325 326 BNE LoopY 327 MOV r0, #OMX_Sts_NoErr 328 329;//-----------------End Filter-------------------- 330 M_END 331 332 ENDIF 333 334 END 335 336 337