1;// 2;// Copyright (C) 2007-2008 ARM Limited 3;// 4;// Licensed under the Apache License, Version 2.0 (the "License"); 5;// you may not use this file except in compliance with the License. 6;// You may obtain a copy of the License at 7;// 8;// http://www.apache.org/licenses/LICENSE-2.0 9;// 10;// Unless required by applicable law or agreed to in writing, software 11;// distributed under the License is distributed on an "AS IS" BASIS, 12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13;// See the License for the specific language governing permissions and 14;// limitations under the License. 15;// 16;// 17;// 18;// File Name: omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s 19;// OpenMAX DL: v1.0.2 20;// Revision: 12290 21;// Date: Wednesday, April 9, 2008 22;// 23;// 24;// 25;// 26 27 28 INCLUDE omxtypes_s.h 29 INCLUDE armCOMM_s.h 30 31 M_VARIANTS CortexA8 32 33 IF CortexA8 34 35 IMPORT armVCM4P10_DeblockingChromabSGE4_unsafe 36 IMPORT armVCM4P10_DeblockingChromabSLT4_unsafe 37 38LOOP_COUNT EQU 0x40000000 39MASK_3 EQU 0x03030303 40MASK_4 EQU 0x04040404 41 42;// Function arguments 43 44pSrcDst RN 0 45srcdstStep RN 1 46pAlpha RN 2 47pBeta RN 3 48 49pThresholds RN 5 50pBS RN 4 51bS3210 RN 6 52 53;// Loop 54 55XY RN 7 56 57;// Pixels 58dP_0 DN D4.U8 59dP_1 DN D5.U8 60dP_2 DN D6.U8 61dQ_0 DN D8.U8 62dQ_1 DN D9.U8 63dQ_2 DN D10.U8 64 65;// Filtering Decision 66dAlpha DN D0.U8 67dBeta DN D2.U8 68 69dFilt DN D16.U8 70dAqflg DN D12.U8 71dApflg DN D17.U8 72 73dAp0q0 DN D13.U8 74dAp1p0 DN D12.U8 75dAq1q0 DN D18.U8 76dAp2p0 DN D19.U8 77dAq2q0 DN D17.U8 78 79qBS3210 QN Q13.U16 80dBS3210 DN D26 81dMask_bs DN D27 82dFilt_bs DN D26.U16 83 84;// bSLT4 85dMask_0 DN D14.U8 86dMask_1 DN D15.U8 87dMask_4 DN D1.U16 88 89Mask_4 RN 8 90Mask_3 RN 9 91 92dTemp DN D19.U8 93 94;// Result 95dP_0t DN D13.U8 96dQ_0t DN D31.U8 97 98dP_0n DN D29.U8 99dQ_0n DN D24.U8 100 101 102 ;// Function header 103 M_START omxVCM4P10_FilterDeblockingChroma_HorEdge_I, r9, d15 104 105 ;//Arguments on the stack 106 M_ARG ppThresholds, 4 107 M_ARG ppBS, 4 108 109 ;// d0-dAlpha_0 110 ;// d2-dBeta_0 111 112 ;load alpha1,beta1 somewhere to avoid more loads 113 VLD1 {dAlpha[]}, [pAlpha]! 114 SUB pSrcDst, pSrcDst, srcdstStep, LSL #1 ;? 115 SUB pSrcDst, pSrcDst, srcdstStep 116 VLD1 {dBeta[]}, [pBeta]! 117 118 M_LDR pBS, ppBS 119 M_LDR pThresholds, ppThresholds 120 121 LDR Mask_3, =MASK_3 122 LDR Mask_4, =MASK_4 123 124 VMOV dMask_0, #0 125 VMOV dMask_1, #1 126 VMOV dMask_4, #4 127 128 LDR XY, =LOOP_COUNT 129 130 ;// p0-p3 - d4-d7 131 ;// q0-q3 - d8-d11 132LoopY 133 LDR bS3210, [pBS], #8 134 135 VLD1 dP_2, [pSrcDst], srcdstStep 136 ;1 137 VLD1 dP_1, [pSrcDst], srcdstStep 138 CMP bS3210, #0 139 VLD1 dP_0, [pSrcDst], srcdstStep 140 ;1 141 VLD1 dQ_0, [pSrcDst], srcdstStep 142 VABD dAp2p0, dP_2, dP_0 143 VLD1 dQ_1, [pSrcDst], srcdstStep 144 VABD dAp0q0, dP_0, dQ_0 145 VLD1 dQ_2, [pSrcDst], srcdstStep 146 BEQ NoFilterBS0 147 148 VABD dAp1p0, dP_1, dP_0 149 VABD dAq1q0, dQ_1, dQ_0 150 151 VCGT dFilt, dAlpha, dAp0q0 152 VMOV.U32 dBS3210[0], bS3210 153 VMAX dAp1p0, dAq1q0, dAp1p0 154 VMOVL qBS3210, dBS3210.U8 155 VABD dAq2q0, dQ_2, dQ_0 156 VCGT dMask_bs.S16, dBS3210.S16, #0 157 158 VCGT dAp1p0, dBeta, dAp1p0 159 VCGT dAp2p0, dBeta, dAp2p0 160 161 VAND dFilt, dMask_bs.U8 162 163 TST bS3210, Mask_3 164 165 VCGT dAq2q0, dBeta, dAq2q0 166 VAND dFilt, dFilt, dAp1p0 167 168 VAND dAqflg, dFilt, dAq2q0 169 VAND dApflg, dFilt, dAp2p0 170 171 ;// bS < 4 Filtering 172 BLNE armVCM4P10_DeblockingChromabSLT4_unsafe 173 174 TST bS3210, Mask_4 175 176 SUB pSrcDst, pSrcDst, srcdstStep, LSL #2 177 VTST dFilt_bs, dFilt_bs, dMask_4 178 179 ;// bS == 4 Filtering 180 BLNE armVCM4P10_DeblockingChromabSGE4_unsafe 181 182 VBIT dP_0n, dP_0t, dFilt_bs 183 VBIT dQ_0n, dQ_0t, dFilt_bs 184 185 VBIF dP_0n, dP_0, dFilt 186 VBIF dQ_0n, dQ_0, dFilt 187 188 ;// Result Storage 189 VST1 dP_0n, [pSrcDst], srcdstStep 190 ADDS XY, XY, XY 191 VST1 dQ_0n, [pSrcDst], srcdstStep 192 193 BNE LoopY 194 195 MOV r0, #OMX_Sts_NoErr 196 197 M_EXIT 198 199NoFilterBS0 200 201 VLD1 {dAlpha[]}, [pAlpha] 202 SUB pSrcDst, pSrcDst, srcdstStep, LSL #1 203 ADDS XY, XY, XY 204 VLD1 {dBeta[]}, [pBeta] 205 ADD pThresholds, pThresholds, #4 206 BNE LoopY 207 208 MOV r0, #OMX_Sts_NoErr 209 M_END 210 211 ENDIF 212 213 214 END 215 216 217