1;// 2;// Copyright (C) 2007-2008 ARM Limited 3;// 4;// Licensed under the Apache License, Version 2.0 (the "License"); 5;// you may not use this file except in compliance with the License. 6;// You may obtain a copy of the License at 7;// 8;// http://www.apache.org/licenses/LICENSE-2.0 9;// 10;// Unless required by applicable law or agreed to in writing, software 11;// distributed under the License is distributed on an "AS IS" BASIS, 12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13;// See the License for the specific language governing permissions and 14;// limitations under the License. 15;// 16;// 17;// 18;// File Name: omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s 19;// OpenMAX DL: v1.0.2 20;// Revision: 12290 21;// Date: Wednesday, April 9, 2008 22;// 23;// 24;// 25;// 26 27 INCLUDE omxtypes_s.h 28 INCLUDE armCOMM_s.h 29 30 M_VARIANTS CortexA8 31 32 IF CortexA8 33 34 IMPORT armVCM4P10_DeblockingChromabSGE4_unsafe 35 IMPORT armVCM4P10_DeblockingChromabSLT4_unsafe 36 37LOOP_COUNT EQU 0x40000000 38MASK_3 EQU 0x03030303 39MASK_4 EQU 0x04040404 40 41;// Function arguments 42 43pSrcDst RN 0 44srcdstStep RN 1 45pAlpha RN 2 46pBeta RN 3 47 48pThresholds RN 5 49pBS RN 4 50bS3210 RN 6 51pSrcDst_P RN 10 52pSrcDst_Q RN 12 53 54pTmp RN 10 55pTmp2 RN 12 56step RN 14 57 58;// Loop 59 60XY RN 7 61 62;// Rows input 63dRow0 DN D7.U8 64dRow1 DN D8.U8 65dRow2 DN D5.U8 66dRow3 DN D10.U8 67dRow4 DN D6.U8 68dRow5 DN D9.U8 69dRow6 DN D4.U8 70dRow7 DN D11.U8 71 72 73;// Pixels 74dP_0 DN D4.U8 75dP_1 DN D5.U8 76dP_2 DN D6.U8 77dQ_0 DN D8.U8 78dQ_1 DN D9.U8 79dQ_2 DN D10.U8 80 81;// Filtering Decision 82dAlpha DN D0.U8 83dBeta DN D2.U8 84 85dFilt DN D16.U8 86dAqflg DN D12.U8 87dApflg DN D17.U8 88 89dAp0q0 DN D13.U8 90dAp1p0 DN D12.U8 91dAq1q0 DN D18.U8 92dAp2p0 DN D19.U8 93dAq2q0 DN D17.U8 94 95qBS3210 QN Q13.U16 96dBS3210 DN D26 97dMask_bs DN D27 98dFilt_bs DN D26.U16 99 100;// bSLT4 101dMask_0 DN D14.U8 102dMask_1 DN D15.U8 103dMask_4 DN D1.U16 104 105Mask_4 RN 8 106Mask_3 RN 9 107 108dTemp DN D19.U8 109 110;// Result 111dP_0t DN D13.U8 112dQ_0t DN D31.U8 113 114dP_0n DN D29.U8 115dQ_0n DN D24.U8 116 117 118 ;// Function header 119 M_START omxVCM4P10_FilterDeblockingChroma_VerEdge_I, r12, d15 120 121 ;//Arguments on the stack 122 M_ARG ppThresholds, 4 123 M_ARG ppBS, 4 124 125 ;// d0-dAlpha_0 126 ;// d2-dBeta_0 127 128 ;load alpha1,beta1 somewhere to avoid more loads 129 VLD1 {dAlpha[]}, [pAlpha]! 130 SUB pSrcDst, pSrcDst, #4 131 VLD1 {dBeta[]}, [pBeta]! 132 133 M_LDR pBS, ppBS 134 M_LDR pThresholds, ppThresholds 135 136 LDR Mask_4, =MASK_4 137 LDR Mask_3, =MASK_3 138 139 ;dMask_0-14 140 ;dMask_1-15 141 ;dMask_4-19 142 143 VMOV dMask_0, #0 144 VMOV dMask_1, #1 145 VMOV dMask_4, #4 146 147 LDR XY, =LOOP_COUNT 148 149 ;// p0-p3 - d4-d7 150 ;// q0-q3 - d8-d11 151 152 153LoopY 154 LDR bS3210, [pBS], #8 155 ADD pTmp, pSrcDst, srcdstStep 156 ADD step, srcdstStep, srcdstStep 157 158 ;1 159 VLD1 dRow0, [pSrcDst], step 160 ;1 161 VLD1 dRow1, [pTmp], step 162 VLD1 dRow2, [pSrcDst], step 163 VLD1 dRow3, [pTmp], step 164 VLD1 dRow4, [pSrcDst], step 165 VLD1 dRow5, [pTmp], step 166 VLD1 dRow6, [pSrcDst], step 167 VLD1 dRow7, [pTmp], step 168 169 170 ;// dRow0 = [q3r0 q2r0 q1r0 q0r0 p0r0 p1r0 p2r0 p3r0] 171 ;// dRow1 = [q3r1 q2r1 q1r1 q0r1 p0r1 p1r1 p2r1 p3r1] 172 ;// dRow2 = [q3r2 q2r2 q1r2 q0r2 p0r2 p1r2 p2r2 p3r2] 173 ;// dRow3 = [q3r3 q2r3 q1r3 q0r3 p0r3 p1r3 p2r3 p3r3] 174 ;// dRow4 = [q3r4 q2r4 q1r4 q0r4 p0r4 p1r4 p2r4 p3r4] 175 ;// dRow5 = [q3r5 q2r5 q1r5 q0r5 p0r5 p1r5 p2r5 p3r5] 176 ;// dRow6 = [q3r6 q2r6 q1r6 q0r6 p0r6 p1r6 p2r6 p3r6] 177 ;// dRow7 = [q3r7 q2r7 q1r7 q0r7 p0r7 p1r7 p2r7 p3r7] 178 179 ;// 8x8 Transpose 180 VZIP.8 dRow0, dRow1 181 VZIP.8 dRow2, dRow3 182 VZIP.8 dRow4, dRow5 183 VZIP.8 dRow6, dRow7 184 185 VZIP.16 dRow0, dRow2 186 VZIP.16 dRow1, dRow3 187 VZIP.16 dRow4, dRow6 188 VZIP.16 dRow5, dRow7 189 190 VZIP.32 dRow0, dRow4 191 VZIP.32 dRow2, dRow6 192 VZIP.32 dRow3, dRow7 193 VZIP.32 dRow1, dRow5 194 195 196 ;Realign the pointers 197 198 CMP bS3210, #0 199 VABD dAp2p0, dP_2, dP_0 200 VABD dAp0q0, dP_0, dQ_0 201 BEQ NoFilterBS0 202 203 VABD dAp1p0, dP_1, dP_0 204 VABD dAq1q0, dQ_1, dQ_0 205 206 VMOV.U32 dBS3210[0], bS3210 207 VCGT dFilt, dAlpha, dAp0q0 208 VMAX dAp1p0, dAq1q0, dAp1p0 209 VMOVL qBS3210, dBS3210.U8 210 VABD dAq2q0, dQ_2, dQ_0 211 VCGT dMask_bs.S16, dBS3210.S16, #0 212 213 VCGT dAp1p0, dBeta, dAp1p0 214 VCGT dAp2p0, dBeta, dAp2p0 215 VAND dFilt, dMask_bs.U8 216 217 TST bS3210, Mask_3 218 219 VCGT dAq2q0, dBeta, dAq2q0 220 VAND dFilt, dFilt, dAp1p0 221 222 VAND dAqflg, dFilt, dAq2q0 223 VAND dApflg, dFilt, dAp2p0 224 225 ;// bS < 4 Filtering 226 BLNE armVCM4P10_DeblockingChromabSLT4_unsafe 227 228 TST bS3210, Mask_4 229 230 SUB pSrcDst, pSrcDst, srcdstStep, LSL #3 231 VTST dFilt_bs, dFilt_bs, dMask_4 232 233 ;// bS == 4 Filtering 234 BLNE armVCM4P10_DeblockingChromabSGE4_unsafe 235 236 VBIT dP_0n, dP_0t, dFilt_bs 237 VBIT dQ_0n, dQ_0t, dFilt_bs 238 239 ;// Result Storage 240 ADD pSrcDst_P, pSrcDst, #3 241 VBIF dP_0n, dP_0, dFilt 242 243 ADD pTmp2, pSrcDst_P, srcdstStep 244 ADD step, srcdstStep, srcdstStep 245 VBIF dQ_0n, dQ_0, dFilt 246 247 ADDS XY, XY, XY 248 249 VST1 {dP_0n[0]}, [pSrcDst_P], step 250 VST1 {dP_0n[1]}, [pTmp2], step 251 VST1 {dP_0n[2]}, [pSrcDst_P], step 252 VST1 {dP_0n[3]}, [pTmp2], step 253 VST1 {dP_0n[4]}, [pSrcDst_P], step 254 VST1 {dP_0n[5]}, [pTmp2], step 255 VST1 {dP_0n[6]}, [pSrcDst_P], step 256 VST1 {dP_0n[7]}, [pTmp2], step 257 258 ADD pSrcDst_Q, pSrcDst, #4 259 ADD pTmp, pSrcDst_Q, srcdstStep 260 261 VST1 {dQ_0n[0]}, [pSrcDst_Q], step 262 VST1 {dQ_0n[1]}, [pTmp], step 263 VST1 {dQ_0n[2]}, [pSrcDst_Q], step 264 VST1 {dQ_0n[3]}, [pTmp], step 265 VST1 {dQ_0n[4]}, [pSrcDst_Q], step 266 VST1 {dQ_0n[5]}, [pTmp], step 267 VST1 {dQ_0n[6]}, [pSrcDst_Q], step 268 VST1 {dQ_0n[7]}, [pTmp], step 269 270 ADD pSrcDst, pSrcDst, #4 271 272 BNE LoopY 273 274 MOV r0, #OMX_Sts_NoErr 275 276 M_EXIT 277 278NoFilterBS0 279 VLD1 {dAlpha[]}, [pAlpha] 280 ADD pSrcDst, pSrcDst, #4 281 SUB pSrcDst, pSrcDst, srcdstStep, LSL #3 282 ADDS XY, XY, XY 283 VLD1 {dBeta[]}, [pBeta] 284 ADD pThresholds, pThresholds, #4 285 BNE LoopY 286 287 MOV r0, #OMX_Sts_NoErr 288 289 M_END 290 291 ENDIF 292 293 294 END 295 296 297