omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1;// 2;// Copyright (C) 2007-2008 ARM Limited 3;// 4;// Licensed under the Apache License, Version 2.0 (the "License"); 5;// you may not use this file except in compliance with the License. 6;// You may obtain a copy of the License at 7;// 8;// http://www.apache.org/licenses/LICENSE-2.0 9;// 10;// Unless required by applicable law or agreed to in writing, software 11;// distributed under the License is distributed on an "AS IS" BASIS, 12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13;// See the License for the specific language governing permissions and 14;// limitations under the License. 15;// 16;// 17;// 18;// File Name: omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s 19;// OpenMAX DL: v1.0.2 20;// Revision: 9641 21;// Date: Thursday, February 7, 2008 22;// 23;// 24;// 25;// 26 27 INCLUDE omxtypes_s.h 28 INCLUDE armCOMM_s.h 29 30 M_VARIANTS ARM1136JS 31 32 IMPORT armVCM4P10_DeblockingLumabSLT4_unsafe 33 IMPORT armVCM4P10_DeblockingLumabSGE4_unsafe 34 35 36 37 IF ARM1136JS 38 39 40MASK_0 EQU 0x00000000 41MASK_1 EQU 0x01010101 42MASK_2 EQU 0xff00ff00 43LOOP_COUNT EQU 0x11110000 44 45;// Declare input registers 46 47pSrcDst RN 0 48srcdstStep RN 1 49pAlphaArg RN 2 50pBetaArg RN 3 51 52pThresholds RN 14 53pBS RN 9 54pQ0 RN 0 55bS RN 2 56 57alpha RN 6 58alpha0 RN 6 59alpha1 RN 8 60 61beta RN 7 62beta0 RN 7 63beta1 RN 9 64 65;// Declare Local/Temporary variables 66 67;// Pixels 68p_0 RN 3 69p_1 RN 5 70p_2 RN 4 71p_3 RN 2 72q_0 RN 8 73q_1 RN 9 74q_2 RN 10 75q_3 RN 12 76 77;// Filtering 78 79dp0q0 RN 12 80dp1p0 RN 12 81dq1q0 RN 12 82dp2p0 RN 12 83dq2q0 RN 12 84 85ap0q0 RN 1 86filt RN 2 87 88m00 RN 14 89m01 RN 11 90 91apflg RN 0 92aqflg RN 6 93apqflg RN 0 94 95 96;//Declarations for bSLT4 kernel 97 98tC0 RN 7 99ptC0 RN 1 100 101pQ0a RN 0 102Stepa RN 1 103maska RN 14 104 105P0a RN 1 106P1a RN 8 107Q0a RN 7 108Q1a RN 11 109 110;//Declarations for bSGE4 kernel 111 112pQ0b RN 0 113Stepb RN 1 114maskb RN 14 115 116P0b RN 6 117P1b RN 7 118P2b RN 1 119P3b RN 3 120 121Q0b RN 9 122Q1b RN 0 123Q2b RN 2 124Q3b RN 3 125 126;// Miscellanous 127XY RN 8 128t0 RN 3 129t1 RN 12 130t2 RN 14 131t7 RN 7 132t4 RN 4 133t5 RN 1 134t8 RN 6 135a RN 0 136 137 138 139 140 ;// Allocate stack memory 141 M_ALLOC4 ppThresholds,4 142 M_ALLOC4 pQ_3,4 143 M_ALLOC4 pP_3,4 144 M_ALLOC8 pAlphaBeta0,8 145 M_ALLOC8 pAlphaBeta1,8 146 M_ALLOC8 pXYBS,4 147 M_ALLOC4 ppBS,4 148 M_ALLOC8 ppQ0Step,4 149 M_ALLOC4 pStep,4 150 151 ;// Function header 152 M_START omxVCM4P10_FilterDeblockingLuma_HorEdge_I, r11 153 154 ;//Input arguments on the stack 155 M_ARG ppThresholdsArg, 4 156 M_ARG ppBSArg, 4 157 158 LDR t4,=MASK_1 159 160 LDRB alpha0, [pAlphaArg] 161 LDRB beta0, [pBetaArg] 162 LDRB alpha1, [pAlphaArg,#1] 163 LDRB beta1, [pBetaArg,#1] 164 165 MUL alpha0, alpha0, t4 166 MUL beta0, beta0, t4 167 MUL alpha1, alpha1, t4 168 MUL beta1, beta1, t4 169 170 M_STRD alpha0, beta0, pAlphaBeta0 171 M_STRD alpha1, beta1, pAlphaBeta1 172 173 LDR XY,=LOOP_COUNT 174 M_LDR pBS, ppBSArg 175 M_LDR pThresholds, ppThresholdsArg 176 M_STR srcdstStep, pStep 177 M_STRD XY, pBS, pXYBS 178 SUB pQ0, pQ0, srcdstStep, LSL #2 179 M_STR pThresholds, ppThresholds 180LoopY 181LoopX 182;//---------------Load Pixels------------------- 183 M_STR pQ0, ppQ0Step 184 M_LDR p_3, [pQ0], srcdstStep 185 M_LDR p_2, [pQ0], srcdstStep 186 M_STR p_3, pP_3 187 LDRB bS, [pBS], #1 188 M_STR pBS, ppBS 189 M_LDR p_1, [pQ0], srcdstStep 190 CMP bS, #0 191 M_LDR p_0, [pQ0], srcdstStep 192 M_LDR q_0, [pQ0], srcdstStep 193 M_LDR q_1, [pQ0], srcdstStep 194 M_LDR q_2, [pQ0], srcdstStep 195 M_LDR q_3, [pQ0], srcdstStep 196 BEQ NoFilterBS0 197 CMP bS, #4 198 M_STR q_3, pQ_3 199 200;//--------------Filtering Decision ------------------- 201 LDR m01, =MASK_1 ;// 01010101 mask 202 MOV m00, #MASK_0 ;// 00000000 mask 203 204 ;// Check |p0-q0|<Alpha 205 USUB8 dp0q0, p_0, q_0 206 USUB8 a, q_0, p_0 207 SEL ap0q0, a, dp0q0 208 USUB8 a, ap0q0, alpha 209 SEL filt, m00, m01 210 211 ;// Check |p1-p0|<Beta 212 USUB8 dp1p0, p_1, p_0 213 USUB8 a, p_0, p_1 214 SEL a, a, dp1p0 215 USUB8 a, a, beta 216 SEL filt, m00, filt 217 218 ;// Check |q1-q0|<Beta 219 USUB8 dq1q0, q_1, q_0 220 USUB8 a, q_0, q_1 221 SEL a, a, dq1q0 222 USUB8 a, a, beta 223 SEL filt, m00, filt 224 225 ;// Check ap<Beta 226 USUB8 dp2p0, p_2, p_0 227 USUB8 a, p_0, p_2 228 SEL a, a, dp2p0 229 USUB8 a, a, beta 230 SEL apflg, m00, filt ;// apflg = filt && (ap<beta) 231 232 ;// Check aq<Beta 233 USUB8 dq2q0, q_2, q_0 234 USUB8 t2, q_0, q_2 235 SEL t2, t2, dq2q0 236 USUB8 t2, t2, beta 237 MOV t7,#0 238 239 BLT bSLT4 240;//-------------------Filter-------------------- 241bSGE4 242 ;//---------bSGE4 Execution--------------- 243 SEL t1, t7, filt ;// aqflg = filt && (aq<beta) 244 CMP filt, #0 245 ORR apqflg, apflg, t1, LSL #1 246 M_LDRD pQ0, srcdstStep, ppQ0Step, EQ 247 BEQ NoFilterFilt0 248 249 BL armVCM4P10_DeblockingLumabSGE4_unsafe 250 251 ;//---------Store result--------------- 252 M_LDR pThresholds, ppThresholds 253 MOV p_2, Q1b 254 MOV p_1, P2b 255 M_LDRD pQ0b, Stepb, ppQ0Step 256 ADD pThresholds, #1 257 M_STR pThresholds, ppThresholds 258 M_STR p_1, [pQ0b, Stepb]! 259 M_STR P1b, [pQ0b, Stepb]! 260 M_STR P0b, [pQ0b, Stepb]! 261 M_STR Q0b, [pQ0b, Stepb]! 262 STR p_2, [pQ0b, Stepb] 263 STR Q2b, [pQ0b, Stepb, LSL #1] 264 265 266 M_LDRD XY, pBS, pXYBS 267 SUB pQ0, pQ0b, Stepb, LSL #2 268 ADD pQ0, pQ0, #4 269 M_LDRD alpha, beta, pAlphaBeta0 270 ADDS XY, XY, XY 271 M_STR XY, pXYBS 272 BCC LoopX 273 B ExitLoopY 274 275;//---------- Exit of LoopX -------------- 276;//---- for the case of no filtering ----- 277 278NoFilterBS0 279 SUB pQ0, pQ0, srcdstStep, LSL #3 280NoFilterFilt0 281 ADD pQ0, pQ0, #4 282 ;// Load counter for LoopX 283 M_LDRD XY, pBS, pXYBS 284 M_LDR pThresholds, ppThresholds 285 M_LDRD alpha, beta, pAlphaBeta0 286 287 ;// Align the pointers 288 ADDS XY, XY, XY 289 ADD pThresholds, pThresholds, #1 290 M_STR pThresholds, ppThresholds 291 M_STR XY, pXYBS 292 BCC LoopX 293 B ExitLoopY 294 295bSLT4 296 ;//---------bSLT4 Execution--------------- 297 SEL aqflg, t7, filt ;// aqflg = filt && (aq<beta) 298 M_LDR ptC0, ppThresholds 299 CMP filt, #0 300 M_LDRD pQ0, srcdstStep, ppQ0Step, EQ 301 BEQ NoFilterFilt0 302 303 LDRB tC0, [ptC0], #1 304 M_STR ptC0, ppThresholds 305 306 BL armVCM4P10_DeblockingLumabSLT4_unsafe 307 308 ;//---------Store result--------------- 309 MOV p_2, P0a 310 M_LDRD pQ0a, Stepa, ppQ0Step 311 M_STR P1a, [pQ0a, Stepa, LSL #1]! 312 M_STR p_2, [pQ0a, Stepa]! 313 M_STR Q0a, [pQ0a, Stepa]! 314 STR Q1a, [pQ0a, Stepa] 315 316 ;// Load counter 317 M_LDRD XY, pBS, pXYBS 318 M_LDRD alpha, beta, pAlphaBeta0 319 320 SUB pQ0, pQ0a, Stepa, LSL #2 321 ADD pQ0, pQ0, #4 322 323 ADDS XY, XY, XY 324 M_STR XY, pXYBS 325 BCC LoopX 326 327;//-------- Common Exit of LoopY ----------------- 328 ;// Align the pointers 329ExitLoopY 330 M_LDRD alpha, beta, pAlphaBeta1 331 SUB pQ0, pQ0, #16 332 ADD pQ0, pQ0, srcdstStep, LSL #2 333 M_STRD alpha, beta, pAlphaBeta0 334 335 BNE LoopY 336 MOV r0, #OMX_Sts_NoErr 337;//-----------------End Filter-------------------- 338 M_END 339 340 ENDIF 341 342 343 END 344 345 346