1;// 2;// Copyright (C) 2007-2008 ARM Limited 3;// 4;// Licensed under the Apache License, Version 2.0 (the "License"); 5;// you may not use this file except in compliance with the License. 6;// You may obtain a copy of the License at 7;// 8;// http://www.apache.org/licenses/LICENSE-2.0 9;// 10;// Unless required by applicable law or agreed to in writing, software 11;// distributed under the License is distributed on an "AS IS" BASIS, 12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13;// See the License for the specific language governing permissions and 14;// limitations under the License. 15;// 16;// 17;// 18;// File Name: omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s 19;// OpenMAX DL: v1.0.2 20;// Revision: 12290 21;// Date: Wednesday, April 9, 2008 22;// 23;// 24;// 25;// 26 27 INCLUDE omxtypes_s.h 28 INCLUDE armCOMM_s.h 29 30 M_VARIANTS CortexA8 31 32 IMPORT armVCM4P10_DeblockingLumabSLT4_unsafe 33 IMPORT armVCM4P10_DeblockingLumabSGE4_unsafe 34 35 IF CortexA8 36 37LOOP_COUNT EQU 0x55000000 38 39 40;// Function arguments 41 42pSrcDst RN 0 43srcdstStep RN 1 44pAlpha RN 2 45pBeta RN 3 46 47pThresholds RN 5 48pBS RN 4 49bS10 RN 12 50 51pAlpha_0 RN 2 52pBeta_0 RN 3 53 54pAlpha_1 RN 7 55pBeta_1 RN 8 56 57 58 59;// Loop 60 61XY RN 9 62 63pTmp RN 6 64step RN 10 65 66;// Pixels 67dP_0 DN D4.U8 68dP_1 DN D5.U8 69dP_2 DN D6.U8 70dP_3 DN D7.U8 71dQ_0 DN D8.U8 72dQ_1 DN D9.U8 73dQ_2 DN D10.U8 74dQ_3 DN D11.U8 75 76 77;// Filtering Decision 78dAlpha DN D0.U8 79dBeta DN D2.U8 80 81dFilt DN D16.U8 82dAqflg DN D12.U8 83dApflg DN D17.U8 84 85dAp0q0 DN D13.U8 86dAp1p0 DN D12.U8 87dAq1q0 DN D18.U8 88dAp2p0 DN D19.U8 89dAq2q0 DN D17.U8 90 91;// bSLT4 92dTC0 DN D18.U8 93dTC1 DN D19.U8 94dTC01 DN D18.U8 95 96dTCs DN D31.S8 97dTC DN D31.U8 98 99dMask_0 DN D14.U8 100dMask_1 DN D15.U8 101 102Mask_0 RN 11 103 104dTemp DN D19.U8 105 106;// Computing P0,Q0 107qDq0p0 QN Q10.S16 108qDp1q1 QN Q11.S16 109qDelta QN Q10.S16 ; reuse qDq0p0 110dDelta DN D20.S8 111 112 113;// Computing P1,Q1 114dRp0q0 DN D24.U8 115 116dMaxP DN D23.U8 117dMinP DN D22.U8 118 119dMaxQ DN D19.U8 120dMinQ DN D21.U8 121 122dDeltaP DN D26.U8 123dDeltaQ DN D27.U8 124 125qP_0n QN Q14.S16 126qQ_0n QN Q12.S16 127 128dQ_0n DN D24.U8 129dQ_1n DN D25.U8 130dP_0n DN D29.U8 131dP_1n DN D30.U8 132 133;// bSGE4 134 135qSp0q0 QN Q10.U16 136 137qSp2q1 QN Q11.U16 138qSp0q0p1 QN Q12.U16 139qSp3p2 QN Q13.U16 140dHSp0q1 DN D28.U8 141 142qSq2p1 QN Q11.U16 143qSp0q0q1 QN Q12.U16 144qSq3q2 QN Q13.U16 ;!! 145dHSq0p1 DN D28.U8 ;!! 146 147qTemp1 QN Q11.U16 ;!!;qSp2q1 148qTemp2 QN Q12.U16 ;!!;qSp0q0p1 149 150dP_0t DN D28.U8 ;!!;dHSp0q1 151dQ_0t DN D22.U8 ;!!;Temp1 152 153dP_0n DN D29.U8 154dP_1n DN D30.U8 155dP_2n DN D31.U8 156 157dQ_0n DN D24.U8 ;!!;Temp2 158dQ_1n DN D25.U8 ;!!;Temp2 159dQ_2n DN D28.U8 ;!!;dQ_0t 160 161 162 ;// Function header 163 M_START omxVCM4P10_FilterDeblockingLuma_HorEdge_I, r11, d15 164 165 ;//Arguments on the stack 166 M_ARG ppThresholds, 4 167 M_ARG ppBS, 4 168 169 ;// d0-dAlpha_0 170 ;// d2-dBeta_0 171 172 ADD pAlpha_1, pAlpha_0, #1 173 ADD pBeta_1, pBeta_0, #1 174 175 VLD1 {dAlpha[]}, [pAlpha_0] 176 SUB pSrcDst, pSrcDst, srcdstStep, LSL #2 177 VLD1 {dBeta[]}, [pBeta_0] 178 179 M_LDR pBS, ppBS 180 M_LDR pThresholds, ppThresholds 181 182 MOV Mask_0,#0 183 184 ;dMask_0-14 185 ;dMask_1-15 186 187 VMOV dMask_0, #0 188 VMOV dMask_1, #1 189 190 ADD step, srcdstStep, srcdstStep 191 192 LDR XY,=LOOP_COUNT 193 194 ;// p0-p3 - d4-d7 195 ;// q0-q3 - d8-d11 196LoopY 197LoopX 198 LDRH bS10, [pBS], #2 199 ADD pTmp, pSrcDst, srcdstStep 200 CMP bS10, #0 201 BEQ NoFilterBS0 202 203 VLD1 dP_3, [pSrcDst], step 204 VLD1 dP_2, [pTmp], step 205 VLD1 dP_1, [pSrcDst], step 206 VLD1 dP_0, [pTmp], step 207 VLD1 dQ_0, [pSrcDst], step 208 VABD dAp1p0, dP_0, dP_1 209 VLD1 dQ_1, [pTmp] 210 VABD dAp0q0, dQ_0, dP_0 211 VLD1 dQ_2, [pSrcDst], srcdstStep 212 213 VABD dAq1q0, dQ_1, dQ_0 214 VABD dAp2p0, dP_2, dP_0 215 VCGT dFilt, dAlpha, dAp0q0 216 217 TST bS10, #0xff 218 VMAX dAp1p0, dAq1q0, dAp1p0 219 VABD dAq2q0, dQ_2, dQ_0 220 221 VMOVEQ.U32 dFilt[0], Mask_0 222 TST bS10, #0xff00 223 224 VCGT dAp2p0, dBeta, dAp2p0 225 VCGT dAp1p0, dBeta, dAp1p0 226 227 VMOVEQ.U32 dFilt[1], Mask_0 228 229 VCGT dAq2q0, dBeta, dAq2q0 230 VLD1 dQ_3, [pSrcDst] 231 VAND dFilt, dFilt, dAp1p0 232 TST bS10, #4 233 234 VAND dAqflg, dFilt, dAq2q0 235 VAND dApflg, dFilt, dAp2p0 236 237 BNE bSGE4 238bSLT4 239 ;// bS < 4 Filtering 240 SUB pSrcDst, pSrcDst, srcdstStep, LSL #2 241 SUB pSrcDst, pSrcDst, srcdstStep 242 243 BL armVCM4P10_DeblockingLumabSLT4_unsafe 244 245 ;// Result Storage 246 VST1 dP_1n, [pSrcDst], srcdstStep 247 VST1 dP_0n, [pSrcDst], srcdstStep 248 SUB pTmp, pSrcDst, srcdstStep, LSL #2 249 VST1 dQ_0n, [pSrcDst], srcdstStep 250 ADDS XY, XY, XY 251 VST1 dQ_1n, [pSrcDst] 252 ADD pSrcDst, pTmp, #8 253 254 BCC LoopX 255 B ExitLoopY 256 257NoFilterBS0 258 ADD pSrcDst, pSrcDst, #8 259 ADDS XY, XY, XY 260 ADD pThresholds, pThresholds, #2 261 BCC LoopX 262 B ExitLoopY 263bSGE4 264 ;// bS >= 4 Filtering 265 SUB pSrcDst, pSrcDst, srcdstStep, LSL #2 266 SUB pSrcDst, pSrcDst, srcdstStep, LSL #1 267 BL armVCM4P10_DeblockingLumabSGE4_unsafe 268 269 ;// Result Storage 270 VST1 dP_2n, [pSrcDst], srcdstStep 271 VST1 dP_1n, [pSrcDst], srcdstStep 272 VST1 dP_0n, [pSrcDst], srcdstStep 273 SUB pTmp, pSrcDst, srcdstStep, LSL #2 274 VST1 dQ_0n, [pSrcDst], srcdstStep 275 ADDS XY,XY,XY 276 VST1 dQ_1n, [pSrcDst], srcdstStep 277 ADD pThresholds, pThresholds, #2 278 VST1 dQ_2n, [pSrcDst] 279 280 ADD pSrcDst, pTmp, #8 281 BCC LoopX 282 283ExitLoopY 284 285 SUB pSrcDst, pSrcDst, #16 286 VLD1 {dAlpha[]}, [pAlpha_1] 287 ADD pSrcDst, pSrcDst, srcdstStep, LSL #2 288 VLD1 {dBeta[]}, [pBeta_1] 289 BNE LoopY 290 291 MOV r0, #OMX_Sts_NoErr 292 293 M_END 294 295 ENDIF 296 297 298 299 300 END 301 302 303