armVCM4P10_DeblockingChroma_unsafe_s.s revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1;// 2;// Copyright (C) 2007-2008 ARM Limited 3;// 4;// Licensed under the Apache License, Version 2.0 (the "License"); 5;// you may not use this file except in compliance with the License. 6;// You may obtain a copy of the License at 7;// 8;// http://www.apache.org/licenses/LICENSE-2.0 9;// 10;// Unless required by applicable law or agreed to in writing, software 11;// distributed under the License is distributed on an "AS IS" BASIS, 12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13;// See the License for the specific language governing permissions and 14;// limitations under the License. 15;// 16;// 17;// 18;// File Name: armVCM4P10_DeblockingChroma_unsafe_s.s 19;// OpenMAX DL: v1.0.2 20;// Revision: 12290 21;// Date: Wednesday, April 9, 2008 22;// 23;// 24;// 25;// 26 27 INCLUDE omxtypes_s.h 28 INCLUDE armCOMM_s.h 29 30 M_VARIANTS CortexA8 31 32 33 IF CortexA8 34 35pAlpha RN 2 36pBeta RN 3 37 38pThresholds RN 5 39pBS RN 4 40bS3210 RN 6 41 42;// Pixels 43dP_0 DN D4.U8 44dP_1 DN D5.U8 45dP_2 DN D6.U8 46dP_3 DN D7.U8 47dQ_0 DN D8.U8 48dQ_1 DN D9.U8 49dQ_2 DN D10.U8 50dQ_3 DN D11.U8 51 52 53;// Filtering Decision 54dAlpha DN D0.U8 55dBeta DN D2.U8 56 57dFilt DN D16.U8 58dAqflg DN D12.U8 59dApflg DN D17.U8 60 61dAp0q0 DN D13.U8 62 63;// bSLT4 64dTC3210 DN D18.U8 65dTCs DN D31.S8 66dTC DN D31.U8 67 68dMask_0 DN D14.U8 69dMask_1 DN D15.U8 70dMask_4 DN D26.U16 71 72dTemp DN D28.U8 73dDummy DN D17.U8 74 75;// Computing P0,Q0 76qDq0p0 QN Q10.S16 77qDp1q1 QN Q11.S16 78qDelta QN Q10.S16 ; reuse qDq0p0 79dDelta DN D20.S8 80 81 82;// Computing P1,Q1 83qP_0n QN Q14.S16 84qQ_0n QN Q12.S16 85 86dQ_0n DN D24.U8 87dP_0n DN D29.U8 88 89;// bSGE4 90 91dHSp0q1 DN D13.U8 92dHSq0p1 DN D31.U8 93 94dBS3210 DN D28.U16 95 96dP_0t DN D13.U8 ;dHSp0q1 97dQ_0t DN D31.U8 ;Temp1 98 99dP_0n DN D29.U8 100dQ_0n DN D24.U8 ;Temp2 101 102;// Register usage for - armVCM4P10_DeblockingLumabSLT4_unsafe 103;// 104;// Inputs - Pixels - p0-p3: D4-D7, q0-q3: D8-D11 105;// - Filter masks - filt: D16, aqflg: D12, apflg: D17 106;// - Additional Params - pThresholds: r5 107;// 108;// Outputs - Pixels - P0-P1: D29-D30, Q0-Q1: D24-D25 109;// - Additional Params - pThresholds: r5 110 111;// Registers Corrupted - D18-D31 112 113 114 M_START armVCM4P10_DeblockingChromabSLT4_unsafe 115 116 117 ;dTC3210 -18 118 ;dTemp-28 119 120 VLD1 d18.U32[0], [pThresholds]! ;here 121 122 ;// delta = (((q0-p0)<<2) + (p1-q1) + 4) >> 3; 123 ;// dDelta = (qDp1q1 >> 2 + qDq0p0 + 1)>> 1 124 125 ;// qDp1q1-11 126 ;// qDq0p0-10 127 VSUBL qDp1q1, dP_1, dQ_1 128 VMOV dTemp, dTC3210 129 VSUBL qDq0p0, dQ_0, dP_0 130 VSHR qDp1q1, qDp1q1, #2 131 VZIP.8 dTC3210, dTemp 132 133 ;// qDelta-qDq0p0-10 134 135 ;// dTC = dTC01 + (dAplg & 1) + (dAqflg & 1) 136 137 ;// dTC3210-18 138 ;// dTemp-28 139 ;// dTC-31 140 VBIF dTC3210, dMask_0, dFilt 141 VRHADD qDelta, qDp1q1, qDq0p0 142 VADD dTC, dTC3210, dMask_1 143 VQMOVN dDelta, qDelta 144 ;// dDelta-d20 145 146 ;// dDelta = (OMX_U8)armClip(0, 255, q0 - delta); 147 VLD1 {dAlpha[]}, [pAlpha] 148 VMIN dDelta, dDelta, dTCs 149 VNEG dTCs, dTCs 150 VLD1 {dBeta[]}, [pBeta] 151 ;1 152 VMAX dDelta, dDelta, dTCs 153 154 ;// dP_0n - 29 155 ;// dQ_0n - 24 156 157 ;// pQ0[-1*Step] = (OMX_U8)armClip(0, 255, dP_0 - delta); 158 ;// pQ0[0*Step] = (OMX_U8)armClip(0, 255, dQ_0 - delta); 159 160 ;// dP_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta); 161 ;// dQ_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta); 162 163 ;// qP_0n - 14 164 ;// qQ_0n - 12 165 166 VMOVL qP_0n, dP_0 167 VMOVL qQ_0n, dQ_0 168 169 ;1 170 VADDW qP_0n, qP_0n, dDelta 171 VSUBW qQ_0n, qQ_0n, dDelta 172 173 VQMOVUN dP_0n, qP_0n 174 VQMOVUN dQ_0n, qQ_0n 175 176 M_END 177 178;// Register usage for - armVCM4P10_DeblockingLumabSGE4_unsafe() 179;// 180;// Inputs - Pixels - p0-p3: D4-D7, q0-q3: D8-D11 181;// - Filter masks - filt: D16, aqflg: D12, apflg: D17 182;// - Additional Params - alpha: D0, dMask_1: D15 183;// 184;// Outputs - Pixels - P0-P2: D29-D31, Q0-Q2: D24,D25,D28 185 186;// Registers Corrupted - D18-D31 187 188 M_START armVCM4P10_DeblockingChromabSGE4_unsafe 189 190 ;dHSq0p1 - 31 191 ;dHSp0q1 - 13 192 VHADD dHSp0q1, dP_0, dQ_1 193 VHADD dHSq0p1, dQ_0, dP_1 194 195 ;// Prepare the bS mask 196 197 ;// dHSp0q1-13 198 ;// dP_0t-dHSp0q1-13 199 ;// dHSq0p1-31 200 ;// dQ_0t-Temp1-31 201 VLD1 {dAlpha[]}, [pAlpha] 202 ADD pThresholds, pThresholds, #4 203 VLD1 {dBeta[]}, [pBeta] 204 205 VRHADD dP_0t, dHSp0q1, dP_1 206 VRHADD dQ_0t, dHSq0p1, dQ_1 207 208 M_END 209 210 ENDIF 211 212 END 213