omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s revision 0c1bc742181ded4930842b46e9507372f0b1b963
1;// 2;// 3;// File Name: omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s 4;// OpenMAX DL: v1.0.2 5;// Revision: 12290 6;// Date: Wednesday, April 9, 2008 7;// 8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9;// 10;// 11;// 12 13 INCLUDE omxtypes_s.h 14 INCLUDE armCOMM_s.h 15 16 M_VARIANTS CortexA8 17 18 IMPORT armVCM4P10_DeblockingLumabSLT4_unsafe 19 IMPORT armVCM4P10_DeblockingLumabSGE4_unsafe 20 21 IF CortexA8 22 23LOOP_COUNT EQU 0x55000000 24 25 26;// Function arguments 27 28pSrcDst RN 0 29srcdstStep RN 1 30pAlpha RN 2 31pBeta RN 3 32 33pThresholds RN 5 34pBS RN 4 35bS10 RN 12 36 37pAlpha_0 RN 2 38pBeta_0 RN 3 39 40pAlpha_1 RN 7 41pBeta_1 RN 8 42 43 44 45;// Loop 46 47XY RN 9 48 49pTmp RN 6 50step RN 10 51 52;// Pixels 53dP_0 DN D4.U8 54dP_1 DN D5.U8 55dP_2 DN D6.U8 56dP_3 DN D7.U8 57dQ_0 DN D8.U8 58dQ_1 DN D9.U8 59dQ_2 DN D10.U8 60dQ_3 DN D11.U8 61 62 63;// Filtering Decision 64dAlpha DN D0.U8 65dBeta DN D2.U8 66 67dFilt DN D16.U8 68dAqflg DN D12.U8 69dApflg DN D17.U8 70 71dAp0q0 DN D13.U8 72dAp1p0 DN D12.U8 73dAq1q0 DN D18.U8 74dAp2p0 DN D19.U8 75dAq2q0 DN D17.U8 76 77;// bSLT4 78dTC0 DN D18.U8 79dTC1 DN D19.U8 80dTC01 DN D18.U8 81 82dTCs DN D31.S8 83dTC DN D31.U8 84 85dMask_0 DN D14.U8 86dMask_1 DN D15.U8 87 88Mask_0 RN 11 89 90dTemp DN D19.U8 91 92;// Computing P0,Q0 93qDq0p0 QN Q10.S16 94qDp1q1 QN Q11.S16 95qDelta QN Q10.S16 ; reuse qDq0p0 96dDelta DN D20.S8 97 98 99;// Computing P1,Q1 100dRp0q0 DN D24.U8 101 102dMaxP DN D23.U8 103dMinP DN D22.U8 104 105dMaxQ DN D19.U8 106dMinQ DN D21.U8 107 108dDeltaP DN D26.U8 109dDeltaQ DN D27.U8 110 111qP_0n QN Q14.S16 112qQ_0n QN Q12.S16 113 114dQ_0n DN D24.U8 115dQ_1n DN D25.U8 116dP_0n DN D29.U8 117dP_1n DN D30.U8 118 119;// bSGE4 120 121qSp0q0 QN Q10.U16 122 123qSp2q1 QN Q11.U16 124qSp0q0p1 QN Q12.U16 125qSp3p2 QN Q13.U16 126dHSp0q1 DN D28.U8 127 128qSq2p1 QN Q11.U16 129qSp0q0q1 QN Q12.U16 130qSq3q2 QN Q13.U16 ;!! 131dHSq0p1 DN D28.U8 ;!! 132 133qTemp1 QN Q11.U16 ;!!;qSp2q1 134qTemp2 QN Q12.U16 ;!!;qSp0q0p1 135 136dP_0t DN D28.U8 ;!!;dHSp0q1 137dQ_0t DN D22.U8 ;!!;Temp1 138 139dP_0n DN D29.U8 140dP_1n DN D30.U8 141dP_2n DN D31.U8 142 143dQ_0n DN D24.U8 ;!!;Temp2 144dQ_1n DN D25.U8 ;!!;Temp2 145dQ_2n DN D28.U8 ;!!;dQ_0t 146 147 148 ;// Function header 149 M_START omxVCM4P10_FilterDeblockingLuma_HorEdge_I, r11, d15 150 151 ;//Arguments on the stack 152 M_ARG ppThresholds, 4 153 M_ARG ppBS, 4 154 155 ;// d0-dAlpha_0 156 ;// d2-dBeta_0 157 158 ADD pAlpha_1, pAlpha_0, #1 159 ADD pBeta_1, pBeta_0, #1 160 161 VLD1 {dAlpha[]}, [pAlpha_0] 162 SUB pSrcDst, pSrcDst, srcdstStep, LSL #2 163 VLD1 {dBeta[]}, [pBeta_0] 164 165 M_LDR pBS, ppBS 166 M_LDR pThresholds, ppThresholds 167 168 MOV Mask_0,#0 169 170 ;dMask_0-14 171 ;dMask_1-15 172 173 VMOV dMask_0, #0 174 VMOV dMask_1, #1 175 176 ADD step, srcdstStep, srcdstStep 177 178 LDR XY,=LOOP_COUNT 179 180 ;// p0-p3 - d4-d7 181 ;// q0-q3 - d8-d11 182LoopY 183LoopX 184 LDRH bS10, [pBS], #2 185 ADD pTmp, pSrcDst, srcdstStep 186 CMP bS10, #0 187 BEQ NoFilterBS0 188 189 VLD1 dP_3, [pSrcDst], step 190 VLD1 dP_2, [pTmp], step 191 VLD1 dP_1, [pSrcDst], step 192 VLD1 dP_0, [pTmp], step 193 VLD1 dQ_0, [pSrcDst], step 194 VABD dAp1p0, dP_0, dP_1 195 VLD1 dQ_1, [pTmp] 196 VABD dAp0q0, dQ_0, dP_0 197 VLD1 dQ_2, [pSrcDst], srcdstStep 198 199 VABD dAq1q0, dQ_1, dQ_0 200 VABD dAp2p0, dP_2, dP_0 201 VCGT dFilt, dAlpha, dAp0q0 202 203 TST bS10, #0xff 204 VMAX dAp1p0, dAq1q0, dAp1p0 205 VABD dAq2q0, dQ_2, dQ_0 206 207 VMOVEQ.U32 dFilt[0], Mask_0 208 TST bS10, #0xff00 209 210 VCGT dAp2p0, dBeta, dAp2p0 211 VCGT dAp1p0, dBeta, dAp1p0 212 213 VMOVEQ.U32 dFilt[1], Mask_0 214 215 VCGT dAq2q0, dBeta, dAq2q0 216 VLD1 dQ_3, [pSrcDst] 217 VAND dFilt, dFilt, dAp1p0 218 TST bS10, #4 219 220 VAND dAqflg, dFilt, dAq2q0 221 VAND dApflg, dFilt, dAp2p0 222 223 BNE bSGE4 224bSLT4 225 ;// bS < 4 Filtering 226 SUB pSrcDst, pSrcDst, srcdstStep, LSL #2 227 SUB pSrcDst, pSrcDst, srcdstStep 228 229 BL armVCM4P10_DeblockingLumabSLT4_unsafe 230 231 ;// Result Storage 232 VST1 dP_1n, [pSrcDst], srcdstStep 233 VST1 dP_0n, [pSrcDst], srcdstStep 234 SUB pTmp, pSrcDst, srcdstStep, LSL #2 235 VST1 dQ_0n, [pSrcDst], srcdstStep 236 ADDS XY, XY, XY 237 VST1 dQ_1n, [pSrcDst] 238 ADD pSrcDst, pTmp, #8 239 240 BCC LoopX 241 B ExitLoopY 242 243NoFilterBS0 244 ADD pSrcDst, pSrcDst, #8 245 ADDS XY, XY, XY 246 ADD pThresholds, pThresholds, #2 247 BCC LoopX 248 B ExitLoopY 249bSGE4 250 ;// bS >= 4 Filtering 251 SUB pSrcDst, pSrcDst, srcdstStep, LSL #2 252 SUB pSrcDst, pSrcDst, srcdstStep, LSL #1 253 BL armVCM4P10_DeblockingLumabSGE4_unsafe 254 255 ;// Result Storage 256 VST1 dP_2n, [pSrcDst], srcdstStep 257 VST1 dP_1n, [pSrcDst], srcdstStep 258 VST1 dP_0n, [pSrcDst], srcdstStep 259 SUB pTmp, pSrcDst, srcdstStep, LSL #2 260 VST1 dQ_0n, [pSrcDst], srcdstStep 261 ADDS XY,XY,XY 262 VST1 dQ_1n, [pSrcDst], srcdstStep 263 ADD pThresholds, pThresholds, #2 264 VST1 dQ_2n, [pSrcDst] 265 266 ADD pSrcDst, pTmp, #8 267 BCC LoopX 268 269ExitLoopY 270 271 SUB pSrcDst, pSrcDst, #16 272 VLD1 {dAlpha[]}, [pAlpha_1] 273 ADD pSrcDst, pSrcDst, srcdstStep, LSL #2 274 VLD1 {dBeta[]}, [pBeta_1] 275 BNE LoopY 276 277 MOV r0, #OMX_Sts_NoErr 278 279 M_END 280 281 ENDIF 282 283 284 285 286 END 287 288 289