1;// 2;// 3;// File Name: omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s 4;// OpenMAX DL: v1.0.2 5;// Revision: 9641 6;// Date: Thursday, February 7, 2008 7;// 8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9;// 10;// 11;// 12 13 INCLUDE omxtypes_s.h 14 INCLUDE armCOMM_s.h 15 16 M_VARIANTS ARM1136JS 17 18 IMPORT armVCM4P10_DeblockingLumabSLT4_unsafe 19 IMPORT armVCM4P10_DeblockingLumabSGE4_unsafe 20 21 22 23 IF ARM1136JS 24 25 26MASK_0 EQU 0x00000000 27MASK_1 EQU 0x01010101 28MASK_2 EQU 0xff00ff00 29LOOP_COUNT EQU 0x11110000 30 31;// Declare input registers 32 33pSrcDst RN 0 34srcdstStep RN 1 35pAlphaArg RN 2 36pBetaArg RN 3 37 38pThresholds RN 14 39pBS RN 9 40pQ0 RN 0 41bS RN 2 42 43alpha RN 6 44alpha0 RN 6 45alpha1 RN 8 46 47beta RN 7 48beta0 RN 7 49beta1 RN 9 50 51;// Declare Local/Temporary variables 52 53;// Pixels 54p_0 RN 3 55p_1 RN 5 56p_2 RN 4 57p_3 RN 2 58q_0 RN 8 59q_1 RN 9 60q_2 RN 10 61q_3 RN 12 62 63;// Filtering 64 65dp0q0 RN 12 66dp1p0 RN 12 67dq1q0 RN 12 68dp2p0 RN 12 69dq2q0 RN 12 70 71ap0q0 RN 1 72filt RN 2 73 74m00 RN 14 75m01 RN 11 76 77apflg RN 0 78aqflg RN 6 79apqflg RN 0 80 81 82;//Declarations for bSLT4 kernel 83 84tC0 RN 7 85ptC0 RN 1 86 87pQ0a RN 0 88Stepa RN 1 89maska RN 14 90 91P0a RN 1 92P1a RN 8 93Q0a RN 7 94Q1a RN 11 95 96;//Declarations for bSGE4 kernel 97 98pQ0b RN 0 99Stepb RN 1 100maskb RN 14 101 102P0b RN 6 103P1b RN 7 104P2b RN 1 105P3b RN 3 106 107Q0b RN 9 108Q1b RN 0 109Q2b RN 2 110Q3b RN 3 111 112;// Miscellanous 113XY RN 8 114t0 RN 3 115t1 RN 12 116t2 RN 14 117t7 RN 7 118t4 RN 4 119t5 RN 1 120t8 RN 6 121a RN 0 122 123 124 125 126 ;// Allocate stack memory 127 M_ALLOC4 ppThresholds,4 128 M_ALLOC4 pQ_3,4 129 M_ALLOC4 pP_3,4 130 M_ALLOC8 pAlphaBeta0,8 131 M_ALLOC8 pAlphaBeta1,8 132 M_ALLOC8 pXYBS,4 133 M_ALLOC4 ppBS,4 134 M_ALLOC8 ppQ0Step,4 135 M_ALLOC4 pStep,4 136 137 ;// Function header 138 M_START omxVCM4P10_FilterDeblockingLuma_HorEdge_I, r11 139 140 ;//Input arguments on the stack 141 M_ARG ppThresholdsArg, 4 142 M_ARG ppBSArg, 4 143 144 LDR t4,=MASK_1 145 146 LDRB alpha0, [pAlphaArg] 147 LDRB beta0, [pBetaArg] 148 LDRB alpha1, [pAlphaArg,#1] 149 LDRB beta1, [pBetaArg,#1] 150 151 MUL alpha0, alpha0, t4 152 MUL beta0, beta0, t4 153 MUL alpha1, alpha1, t4 154 MUL beta1, beta1, t4 155 156 M_STRD alpha0, beta0, pAlphaBeta0 157 M_STRD alpha1, beta1, pAlphaBeta1 158 159 LDR XY,=LOOP_COUNT 160 M_LDR pBS, ppBSArg 161 M_LDR pThresholds, ppThresholdsArg 162 M_STR srcdstStep, pStep 163 M_STRD XY, pBS, pXYBS 164 SUB pQ0, pQ0, srcdstStep, LSL #2 165 M_STR pThresholds, ppThresholds 166LoopY 167LoopX 168;//---------------Load Pixels------------------- 169 M_STR pQ0, ppQ0Step 170 M_LDR p_3, [pQ0], srcdstStep 171 M_LDR p_2, [pQ0], srcdstStep 172 M_STR p_3, pP_3 173 LDRB bS, [pBS], #1 174 M_STR pBS, ppBS 175 M_LDR p_1, [pQ0], srcdstStep 176 CMP bS, #0 177 M_LDR p_0, [pQ0], srcdstStep 178 M_LDR q_0, [pQ0], srcdstStep 179 M_LDR q_1, [pQ0], srcdstStep 180 M_LDR q_2, [pQ0], srcdstStep 181 M_LDR q_3, [pQ0], srcdstStep 182 BEQ NoFilterBS0 183 CMP bS, #4 184 M_STR q_3, pQ_3 185 186;//--------------Filtering Decision ------------------- 187 LDR m01, =MASK_1 ;// 01010101 mask 188 MOV m00, #MASK_0 ;// 00000000 mask 189 190 ;// Check |p0-q0|<Alpha 191 USUB8 dp0q0, p_0, q_0 192 USUB8 a, q_0, p_0 193 SEL ap0q0, a, dp0q0 194 USUB8 a, ap0q0, alpha 195 SEL filt, m00, m01 196 197 ;// Check |p1-p0|<Beta 198 USUB8 dp1p0, p_1, p_0 199 USUB8 a, p_0, p_1 200 SEL a, a, dp1p0 201 USUB8 a, a, beta 202 SEL filt, m00, filt 203 204 ;// Check |q1-q0|<Beta 205 USUB8 dq1q0, q_1, q_0 206 USUB8 a, q_0, q_1 207 SEL a, a, dq1q0 208 USUB8 a, a, beta 209 SEL filt, m00, filt 210 211 ;// Check ap<Beta 212 USUB8 dp2p0, p_2, p_0 213 USUB8 a, p_0, p_2 214 SEL a, a, dp2p0 215 USUB8 a, a, beta 216 SEL apflg, m00, filt ;// apflg = filt && (ap<beta) 217 218 ;// Check aq<Beta 219 USUB8 dq2q0, q_2, q_0 220 USUB8 t2, q_0, q_2 221 SEL t2, t2, dq2q0 222 USUB8 t2, t2, beta 223 MOV t7,#0 224 225 BLT bSLT4 226;//-------------------Filter-------------------- 227bSGE4 228 ;//---------bSGE4 Execution--------------- 229 SEL t1, t7, filt ;// aqflg = filt && (aq<beta) 230 CMP filt, #0 231 ORR apqflg, apflg, t1, LSL #1 232 M_LDRD pQ0, srcdstStep, ppQ0Step, EQ 233 BEQ NoFilterFilt0 234 235 BL armVCM4P10_DeblockingLumabSGE4_unsafe 236 237 ;//---------Store result--------------- 238 M_LDR pThresholds, ppThresholds 239 MOV p_2, Q1b 240 MOV p_1, P2b 241 M_LDRD pQ0b, Stepb, ppQ0Step 242 ADD pThresholds, #1 243 M_STR pThresholds, ppThresholds 244 M_STR p_1, [pQ0b, Stepb]! 245 M_STR P1b, [pQ0b, Stepb]! 246 M_STR P0b, [pQ0b, Stepb]! 247 M_STR Q0b, [pQ0b, Stepb]! 248 STR p_2, [pQ0b, Stepb] 249 STR Q2b, [pQ0b, Stepb, LSL #1] 250 251 252 M_LDRD XY, pBS, pXYBS 253 SUB pQ0, pQ0b, Stepb, LSL #2 254 ADD pQ0, pQ0, #4 255 M_LDRD alpha, beta, pAlphaBeta0 256 ADDS XY, XY, XY 257 M_STR XY, pXYBS 258 BCC LoopX 259 B ExitLoopY 260 261;//---------- Exit of LoopX -------------- 262;//---- for the case of no filtering ----- 263 264NoFilterBS0 265 SUB pQ0, pQ0, srcdstStep, LSL #3 266NoFilterFilt0 267 ADD pQ0, pQ0, #4 268 ;// Load counter for LoopX 269 M_LDRD XY, pBS, pXYBS 270 M_LDR pThresholds, ppThresholds 271 M_LDRD alpha, beta, pAlphaBeta0 272 273 ;// Align the pointers 274 ADDS XY, XY, XY 275 ADD pThresholds, pThresholds, #1 276 M_STR pThresholds, ppThresholds 277 M_STR XY, pXYBS 278 BCC LoopX 279 B ExitLoopY 280 281bSLT4 282 ;//---------bSLT4 Execution--------------- 283 SEL aqflg, t7, filt ;// aqflg = filt && (aq<beta) 284 M_LDR ptC0, ppThresholds 285 CMP filt, #0 286 M_LDRD pQ0, srcdstStep, ppQ0Step, EQ 287 BEQ NoFilterFilt0 288 289 LDRB tC0, [ptC0], #1 290 M_STR ptC0, ppThresholds 291 292 BL armVCM4P10_DeblockingLumabSLT4_unsafe 293 294 ;//---------Store result--------------- 295 MOV p_2, P0a 296 M_LDRD pQ0a, Stepa, ppQ0Step 297 M_STR P1a, [pQ0a, Stepa, LSL #1]! 298 M_STR p_2, [pQ0a, Stepa]! 299 M_STR Q0a, [pQ0a, Stepa]! 300 STR Q1a, [pQ0a, Stepa] 301 302 ;// Load counter 303 M_LDRD XY, pBS, pXYBS 304 M_LDRD alpha, beta, pAlphaBeta0 305 306 SUB pQ0, pQ0a, Stepa, LSL #2 307 ADD pQ0, pQ0, #4 308 309 ADDS XY, XY, XY 310 M_STR XY, pXYBS 311 BCC LoopX 312 313;//-------- Common Exit of LoopY ----------------- 314 ;// Align the pointers 315ExitLoopY 316 M_LDRD alpha, beta, pAlphaBeta1 317 SUB pQ0, pQ0, #16 318 ADD pQ0, pQ0, srcdstStep, LSL #2 319 M_STRD alpha, beta, pAlphaBeta0 320 321 BNE LoopY 322 MOV r0, #OMX_Sts_NoErr 323;//-----------------End Filter-------------------- 324 M_END 325 326 ENDIF 327 328 329 END 330 331