1;// 2;// 3;// File Name: omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s 4;// OpenMAX DL: v1.0.2 5;// Revision: 12290 6;// Date: Wednesday, April 9, 2008 7;// 8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9;// 10;// 11;// 12 13 14 INCLUDE omxtypes_s.h 15 INCLUDE armCOMM_s.h 16 17 M_VARIANTS CortexA8 18 19 IF CortexA8 20 21 IMPORT armVCM4P10_DeblockingChromabSGE4_unsafe 22 IMPORT armVCM4P10_DeblockingChromabSLT4_unsafe 23 24LOOP_COUNT EQU 0x40000000 25MASK_3 EQU 0x03030303 26MASK_4 EQU 0x04040404 27 28;// Function arguments 29 30pSrcDst RN 0 31srcdstStep RN 1 32pAlpha RN 2 33pBeta RN 3 34 35pThresholds RN 5 36pBS RN 4 37bS3210 RN 6 38 39;// Loop 40 41XY RN 7 42 43;// Pixels 44dP_0 DN D4.U8 45dP_1 DN D5.U8 46dP_2 DN D6.U8 47dQ_0 DN D8.U8 48dQ_1 DN D9.U8 49dQ_2 DN D10.U8 50 51;// Filtering Decision 52dAlpha DN D0.U8 53dBeta DN D2.U8 54 55dFilt DN D16.U8 56dAqflg DN D12.U8 57dApflg DN D17.U8 58 59dAp0q0 DN D13.U8 60dAp1p0 DN D12.U8 61dAq1q0 DN D18.U8 62dAp2p0 DN D19.U8 63dAq2q0 DN D17.U8 64 65qBS3210 QN Q13.U16 66dBS3210 DN D26 67dMask_bs DN D27 68dFilt_bs DN D26.U16 69 70;// bSLT4 71dMask_0 DN D14.U8 72dMask_1 DN D15.U8 73dMask_4 DN D1.U16 74 75Mask_4 RN 8 76Mask_3 RN 9 77 78dTemp DN D19.U8 79 80;// Result 81dP_0t DN D13.U8 82dQ_0t DN D31.U8 83 84dP_0n DN D29.U8 85dQ_0n DN D24.U8 86 87 88 ;// Function header 89 M_START omxVCM4P10_FilterDeblockingChroma_HorEdge_I, r9, d15 90 91 ;//Arguments on the stack 92 M_ARG ppThresholds, 4 93 M_ARG ppBS, 4 94 95 ;// d0-dAlpha_0 96 ;// d2-dBeta_0 97 98 ;load alpha1,beta1 somewhere to avoid more loads 99 VLD1 {dAlpha[]}, [pAlpha]! 100 SUB pSrcDst, pSrcDst, srcdstStep, LSL #1 ;? 101 SUB pSrcDst, pSrcDst, srcdstStep 102 VLD1 {dBeta[]}, [pBeta]! 103 104 M_LDR pBS, ppBS 105 M_LDR pThresholds, ppThresholds 106 107 LDR Mask_3, =MASK_3 108 LDR Mask_4, =MASK_4 109 110 VMOV dMask_0, #0 111 VMOV dMask_1, #1 112 VMOV dMask_4, #4 113 114 LDR XY, =LOOP_COUNT 115 116 ;// p0-p3 - d4-d7 117 ;// q0-q3 - d8-d11 118LoopY 119 LDR bS3210, [pBS], #8 120 121 VLD1 dP_2, [pSrcDst], srcdstStep 122 ;1 123 VLD1 dP_1, [pSrcDst], srcdstStep 124 CMP bS3210, #0 125 VLD1 dP_0, [pSrcDst], srcdstStep 126 ;1 127 VLD1 dQ_0, [pSrcDst], srcdstStep 128 VABD dAp2p0, dP_2, dP_0 129 VLD1 dQ_1, [pSrcDst], srcdstStep 130 VABD dAp0q0, dP_0, dQ_0 131 VLD1 dQ_2, [pSrcDst], srcdstStep 132 BEQ NoFilterBS0 133 134 VABD dAp1p0, dP_1, dP_0 135 VABD dAq1q0, dQ_1, dQ_0 136 137 VCGT dFilt, dAlpha, dAp0q0 138 VMOV.U32 dBS3210[0], bS3210 139 VMAX dAp1p0, dAq1q0, dAp1p0 140 VMOVL qBS3210, dBS3210.U8 141 VABD dAq2q0, dQ_2, dQ_0 142 VCGT dMask_bs.S16, dBS3210.S16, #0 143 144 VCGT dAp1p0, dBeta, dAp1p0 145 VCGT dAp2p0, dBeta, dAp2p0 146 147 VAND dFilt, dMask_bs.U8 148 149 TST bS3210, Mask_3 150 151 VCGT dAq2q0, dBeta, dAq2q0 152 VAND dFilt, dFilt, dAp1p0 153 154 VAND dAqflg, dFilt, dAq2q0 155 VAND dApflg, dFilt, dAp2p0 156 157 ;// bS < 4 Filtering 158 BLNE armVCM4P10_DeblockingChromabSLT4_unsafe 159 160 TST bS3210, Mask_4 161 162 SUB pSrcDst, pSrcDst, srcdstStep, LSL #2 163 VTST dFilt_bs, dFilt_bs, dMask_4 164 165 ;// bS == 4 Filtering 166 BLNE armVCM4P10_DeblockingChromabSGE4_unsafe 167 168 VBIT dP_0n, dP_0t, dFilt_bs 169 VBIT dQ_0n, dQ_0t, dFilt_bs 170 171 VBIF dP_0n, dP_0, dFilt 172 VBIF dQ_0n, dQ_0, dFilt 173 174 ;// Result Storage 175 VST1 dP_0n, [pSrcDst], srcdstStep 176 ADDS XY, XY, XY 177 VST1 dQ_0n, [pSrcDst], srcdstStep 178 179 BNE LoopY 180 181 MOV r0, #OMX_Sts_NoErr 182 183 M_EXIT 184 185NoFilterBS0 186 187 VLD1 {dAlpha[]}, [pAlpha] 188 SUB pSrcDst, pSrcDst, srcdstStep, LSL #1 189 ADDS XY, XY, XY 190 VLD1 {dBeta[]}, [pBeta] 191 ADD pThresholds, pThresholds, #4 192 BNE LoopY 193 194 MOV r0, #OMX_Sts_NoErr 195 M_END 196 197 ENDIF 198 199 200 END 201 202 203