omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1;//
2;// Copyright (C) 2007-2008 ARM Limited
3;//
4;// Licensed under the Apache License, Version 2.0 (the "License");
5;// you may not use this file except in compliance with the License.
6;// You may obtain a copy of the License at
7;//
8;//      http://www.apache.org/licenses/LICENSE-2.0
9;//
10;// Unless required by applicable law or agreed to in writing, software
11;// distributed under the License is distributed on an "AS IS" BASIS,
12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13;// See the License for the specific language governing permissions and
14;// limitations under the License.
15;//
16;//
17;//
18;// File Name:  omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
19;// OpenMAX DL: v1.0.2
20;// Revision:   12290
21;// Date:       Wednesday, April 9, 2008
22;//
23;//
24;//
25;//
26
27        INCLUDE omxtypes_s.h
28        INCLUDE armCOMM_s.h
29
30        M_VARIANTS CortexA8
31
32        IF CortexA8
33
34        IMPORT  armVCM4P10_DeblockingChromabSGE4_unsafe
35        IMPORT  armVCM4P10_DeblockingChromabSLT4_unsafe
36
37LOOP_COUNT  EQU 0x40000000
38MASK_3      EQU 0x03030303
39MASK_4      EQU 0x04040404
40
41;// Function arguments
42
43pSrcDst     RN 0
44srcdstStep  RN 1
45pAlpha      RN 2
46pBeta       RN 3
47
48pThresholds RN 5
49pBS         RN 4
50bS3210      RN 6
51pSrcDst_P   RN 10
52pSrcDst_Q   RN 12
53
54pTmp        RN 10
55pTmp2       RN 12
56step        RN 14
57
58;// Loop
59
60XY          RN 7
61
62;// Rows input
63dRow0       DN D7.U8
64dRow1       DN D8.U8
65dRow2       DN D5.U8
66dRow3       DN D10.U8
67dRow4       DN D6.U8
68dRow5       DN D9.U8
69dRow6       DN D4.U8
70dRow7       DN D11.U8
71
72
73;// Pixels
74dP_0        DN D4.U8
75dP_1        DN D5.U8
76dP_2        DN D6.U8
77dQ_0        DN D8.U8
78dQ_1        DN D9.U8
79dQ_2        DN D10.U8
80
81;// Filtering Decision
82dAlpha      DN D0.U8
83dBeta       DN D2.U8
84
85dFilt       DN D16.U8
86dAqflg      DN D12.U8
87dApflg      DN D17.U8
88
89dAp0q0      DN D13.U8
90dAp1p0      DN D12.U8
91dAq1q0      DN D18.U8
92dAp2p0      DN D19.U8
93dAq2q0      DN D17.U8
94
95qBS3210     QN Q13.U16
96dBS3210     DN D26
97dMask_bs    DN D27
98dFilt_bs    DN D26.U16
99
100;// bSLT4
101dMask_0     DN D14.U8
102dMask_1     DN D15.U8
103dMask_4     DN D1.U16
104
105Mask_4      RN 8
106Mask_3      RN 9
107
108dTemp       DN D19.U8
109
110;// Result
111dP_0t       DN D13.U8
112dQ_0t       DN D31.U8
113
114dP_0n       DN D29.U8
115dQ_0n       DN D24.U8
116
117
118        ;// Function header
119        M_START omxVCM4P10_FilterDeblockingChroma_VerEdge_I, r12, d15
120
121        ;//Arguments on the stack
122        M_ARG   ppThresholds, 4
123        M_ARG   ppBS, 4
124
125        ;// d0-dAlpha_0
126        ;// d2-dBeta_0
127
128        ;load alpha1,beta1 somewhere to avoid more loads
129        VLD1        {dAlpha[]}, [pAlpha]!
130        SUB         pSrcDst, pSrcDst, #4
131        VLD1        {dBeta[]}, [pBeta]!
132
133        M_LDR       pBS, ppBS
134        M_LDR       pThresholds, ppThresholds
135
136        LDR         Mask_4, =MASK_4
137        LDR         Mask_3, =MASK_3
138
139        ;dMask_0-14
140        ;dMask_1-15
141        ;dMask_4-19
142
143        VMOV        dMask_0, #0
144        VMOV        dMask_1, #1
145        VMOV        dMask_4, #4
146
147        LDR         XY, =LOOP_COUNT
148
149        ;// p0-p3 - d4-d7
150        ;// q0-q3 - d8-d11
151
152
153LoopY
154        LDR         bS3210, [pBS], #8
155        ADD         pTmp, pSrcDst, srcdstStep
156        ADD         step, srcdstStep, srcdstStep
157
158        ;1
159        VLD1        dRow0, [pSrcDst], step
160        ;1
161        VLD1        dRow1, [pTmp], step
162        VLD1        dRow2, [pSrcDst], step
163        VLD1        dRow3, [pTmp], step
164        VLD1        dRow4, [pSrcDst], step
165        VLD1        dRow5, [pTmp], step
166        VLD1        dRow6, [pSrcDst], step
167        VLD1        dRow7, [pTmp], step
168
169
170        ;// dRow0 = [q3r0 q2r0 q1r0 q0r0 p0r0 p1r0 p2r0 p3r0]
171        ;// dRow1 = [q3r1 q2r1 q1r1 q0r1 p0r1 p1r1 p2r1 p3r1]
172        ;// dRow2 = [q3r2 q2r2 q1r2 q0r2 p0r2 p1r2 p2r2 p3r2]
173        ;// dRow3 = [q3r3 q2r3 q1r3 q0r3 p0r3 p1r3 p2r3 p3r3]
174        ;// dRow4 = [q3r4 q2r4 q1r4 q0r4 p0r4 p1r4 p2r4 p3r4]
175        ;// dRow5 = [q3r5 q2r5 q1r5 q0r5 p0r5 p1r5 p2r5 p3r5]
176        ;// dRow6 = [q3r6 q2r6 q1r6 q0r6 p0r6 p1r6 p2r6 p3r6]
177        ;// dRow7 = [q3r7 q2r7 q1r7 q0r7 p0r7 p1r7 p2r7 p3r7]
178
179        ;// 8x8 Transpose
180        VZIP.8      dRow0, dRow1
181        VZIP.8      dRow2, dRow3
182        VZIP.8      dRow4, dRow5
183        VZIP.8      dRow6, dRow7
184
185        VZIP.16     dRow0, dRow2
186        VZIP.16     dRow1, dRow3
187        VZIP.16     dRow4, dRow6
188        VZIP.16     dRow5, dRow7
189
190        VZIP.32     dRow0, dRow4
191        VZIP.32     dRow2, dRow6
192        VZIP.32     dRow3, dRow7
193        VZIP.32     dRow1, dRow5
194
195
196        ;Realign the pointers
197
198        CMP         bS3210, #0
199        VABD        dAp2p0, dP_2, dP_0
200        VABD        dAp0q0, dP_0, dQ_0
201        BEQ         NoFilterBS0
202
203        VABD        dAp1p0, dP_1, dP_0
204        VABD        dAq1q0, dQ_1, dQ_0
205
206        VMOV.U32    dBS3210[0], bS3210
207        VCGT        dFilt, dAlpha, dAp0q0
208        VMAX        dAp1p0, dAq1q0, dAp1p0
209        VMOVL       qBS3210, dBS3210.U8
210        VABD        dAq2q0, dQ_2, dQ_0
211        VCGT        dMask_bs.S16, dBS3210.S16, #0
212
213        VCGT        dAp1p0, dBeta, dAp1p0
214        VCGT        dAp2p0, dBeta, dAp2p0
215        VAND        dFilt, dMask_bs.U8
216
217        TST         bS3210, Mask_3
218
219        VCGT        dAq2q0, dBeta, dAq2q0
220        VAND        dFilt, dFilt, dAp1p0
221
222        VAND        dAqflg, dFilt, dAq2q0
223        VAND        dApflg, dFilt, dAp2p0
224
225        ;// bS < 4 Filtering
226        BLNE        armVCM4P10_DeblockingChromabSLT4_unsafe
227
228        TST         bS3210, Mask_4
229
230        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #3
231        VTST        dFilt_bs, dFilt_bs, dMask_4
232
233        ;// bS == 4 Filtering
234        BLNE        armVCM4P10_DeblockingChromabSGE4_unsafe
235
236        VBIT        dP_0n, dP_0t, dFilt_bs
237        VBIT        dQ_0n, dQ_0t, dFilt_bs
238
239        ;// Result Storage
240        ADD         pSrcDst_P, pSrcDst, #3
241        VBIF        dP_0n, dP_0, dFilt
242
243        ADD         pTmp2, pSrcDst_P, srcdstStep
244        ADD         step, srcdstStep, srcdstStep
245        VBIF        dQ_0n, dQ_0, dFilt
246
247        ADDS        XY, XY, XY
248
249        VST1        {dP_0n[0]}, [pSrcDst_P], step
250        VST1        {dP_0n[1]}, [pTmp2], step
251        VST1        {dP_0n[2]}, [pSrcDst_P], step
252        VST1        {dP_0n[3]}, [pTmp2], step
253        VST1        {dP_0n[4]}, [pSrcDst_P], step
254        VST1        {dP_0n[5]}, [pTmp2], step
255        VST1        {dP_0n[6]}, [pSrcDst_P], step
256        VST1        {dP_0n[7]}, [pTmp2], step
257
258        ADD         pSrcDst_Q, pSrcDst, #4
259        ADD         pTmp, pSrcDst_Q, srcdstStep
260
261        VST1        {dQ_0n[0]}, [pSrcDst_Q], step
262        VST1        {dQ_0n[1]}, [pTmp], step
263        VST1        {dQ_0n[2]}, [pSrcDst_Q], step
264        VST1        {dQ_0n[3]}, [pTmp], step
265        VST1        {dQ_0n[4]}, [pSrcDst_Q], step
266        VST1        {dQ_0n[5]}, [pTmp], step
267        VST1        {dQ_0n[6]}, [pSrcDst_Q], step
268        VST1        {dQ_0n[7]}, [pTmp], step
269
270        ADD         pSrcDst, pSrcDst, #4
271
272        BNE         LoopY
273
274        MOV         r0, #OMX_Sts_NoErr
275
276        M_EXIT
277
278NoFilterBS0
279        VLD1        {dAlpha[]}, [pAlpha]
280        ADD         pSrcDst, pSrcDst, #4
281        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #3
282        ADDS        XY, XY, XY
283        VLD1        {dBeta[]}, [pBeta]
284        ADD         pThresholds, pThresholds, #4
285        BNE         LoopY
286
287        MOV         r0, #OMX_Sts_NoErr
288
289        M_END
290
291        ENDIF
292
293
294        END
295
296
297