1;//
2;// Copyright (C) 2007-2008 ARM Limited
3;//
4;// Licensed under the Apache License, Version 2.0 (the "License");
5;// you may not use this file except in compliance with the License.
6;// You may obtain a copy of the License at
7;//
8;//      http://www.apache.org/licenses/LICENSE-2.0
9;//
10;// Unless required by applicable law or agreed to in writing, software
11;// distributed under the License is distributed on an "AS IS" BASIS,
12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13;// See the License for the specific language governing permissions and
14;// limitations under the License.
15;//
16;//
17;//
18;// File Name:  omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
19;// OpenMAX DL: v1.0.2
20;// Revision:   12290
21;// Date:       Wednesday, April 9, 2008
22;//
23;//
24;//
25;//
26
27
28        INCLUDE omxtypes_s.h
29        INCLUDE armCOMM_s.h
30
31        M_VARIANTS CortexA8
32
33        IF CortexA8
34
35        IMPORT  armVCM4P10_DeblockingChromabSGE4_unsafe
36        IMPORT  armVCM4P10_DeblockingChromabSLT4_unsafe
37
38LOOP_COUNT  EQU 0x40000000
39MASK_3      EQU 0x03030303
40MASK_4      EQU 0x04040404
41
42;// Function arguments
43
44pSrcDst     RN 0
45srcdstStep  RN 1
46pAlpha      RN 2
47pBeta       RN 3
48
49pThresholds RN 5
50pBS         RN 4
51bS3210      RN 6
52
53;// Loop
54
55XY          RN 7
56
57;// Pixels
58dP_0        DN D4.U8
59dP_1        DN D5.U8
60dP_2        DN D6.U8
61dQ_0        DN D8.U8
62dQ_1        DN D9.U8
63dQ_2        DN D10.U8
64
65;// Filtering Decision
66dAlpha      DN D0.U8
67dBeta       DN D2.U8
68
69dFilt       DN D16.U8
70dAqflg      DN D12.U8
71dApflg      DN D17.U8
72
73dAp0q0      DN D13.U8
74dAp1p0      DN D12.U8
75dAq1q0      DN D18.U8
76dAp2p0      DN D19.U8
77dAq2q0      DN D17.U8
78
79qBS3210     QN Q13.U16
80dBS3210     DN D26
81dMask_bs    DN D27
82dFilt_bs    DN D26.U16
83
84;// bSLT4
85dMask_0     DN D14.U8
86dMask_1     DN D15.U8
87dMask_4     DN D1.U16
88
89Mask_4      RN 8
90Mask_3      RN 9
91
92dTemp       DN D19.U8
93
94;// Result
95dP_0t       DN D13.U8
96dQ_0t       DN D31.U8
97
98dP_0n       DN D29.U8
99dQ_0n       DN D24.U8
100
101
102        ;// Function header
103        M_START omxVCM4P10_FilterDeblockingChroma_HorEdge_I, r9, d15
104
105        ;//Arguments on the stack
106        M_ARG   ppThresholds, 4
107        M_ARG   ppBS, 4
108
109        ;// d0-dAlpha_0
110        ;// d2-dBeta_0
111
112        ;load alpha1,beta1 somewhere to avoid more loads
113        VLD1        {dAlpha[]}, [pAlpha]!
114        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #1 ;?
115        SUB         pSrcDst, pSrcDst, srcdstStep
116        VLD1        {dBeta[]}, [pBeta]!
117
118        M_LDR       pBS, ppBS
119        M_LDR       pThresholds, ppThresholds
120
121        LDR         Mask_3, =MASK_3
122        LDR         Mask_4, =MASK_4
123
124        VMOV        dMask_0, #0
125        VMOV        dMask_1, #1
126        VMOV        dMask_4, #4
127
128        LDR         XY, =LOOP_COUNT
129
130        ;// p0-p3 - d4-d7
131        ;// q0-q3 - d8-d11
132LoopY
133        LDR         bS3210, [pBS], #8
134
135        VLD1        dP_2, [pSrcDst], srcdstStep
136        ;1
137        VLD1        dP_1, [pSrcDst], srcdstStep
138        CMP         bS3210, #0
139        VLD1        dP_0, [pSrcDst], srcdstStep
140        ;1
141        VLD1        dQ_0, [pSrcDst], srcdstStep
142        VABD        dAp2p0, dP_2, dP_0
143        VLD1        dQ_1, [pSrcDst], srcdstStep
144        VABD        dAp0q0, dP_0, dQ_0
145        VLD1        dQ_2, [pSrcDst], srcdstStep
146        BEQ         NoFilterBS0
147
148        VABD        dAp1p0, dP_1, dP_0
149        VABD        dAq1q0, dQ_1, dQ_0
150
151        VCGT        dFilt, dAlpha, dAp0q0
152        VMOV.U32    dBS3210[0], bS3210
153        VMAX        dAp1p0, dAq1q0, dAp1p0
154        VMOVL       qBS3210, dBS3210.U8
155        VABD        dAq2q0, dQ_2, dQ_0
156        VCGT        dMask_bs.S16, dBS3210.S16, #0
157
158        VCGT        dAp1p0, dBeta, dAp1p0
159        VCGT        dAp2p0, dBeta, dAp2p0
160
161        VAND        dFilt, dMask_bs.U8
162
163        TST         bS3210, Mask_3
164
165        VCGT        dAq2q0, dBeta, dAq2q0
166        VAND        dFilt, dFilt, dAp1p0
167
168        VAND        dAqflg, dFilt, dAq2q0
169        VAND        dApflg, dFilt, dAp2p0
170
171        ;// bS < 4 Filtering
172        BLNE        armVCM4P10_DeblockingChromabSLT4_unsafe
173
174        TST         bS3210, Mask_4
175
176        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #2
177        VTST        dFilt_bs, dFilt_bs, dMask_4
178
179        ;// bS == 4 Filtering
180        BLNE        armVCM4P10_DeblockingChromabSGE4_unsafe
181
182        VBIT        dP_0n, dP_0t, dFilt_bs
183        VBIT        dQ_0n, dQ_0t, dFilt_bs
184
185        VBIF        dP_0n, dP_0, dFilt
186        VBIF        dQ_0n, dQ_0, dFilt
187
188        ;// Result Storage
189        VST1        dP_0n, [pSrcDst], srcdstStep
190        ADDS        XY, XY, XY
191        VST1        dQ_0n, [pSrcDst], srcdstStep
192
193        BNE         LoopY
194
195        MOV         r0, #OMX_Sts_NoErr
196
197        M_EXIT
198
199NoFilterBS0
200
201        VLD1        {dAlpha[]}, [pAlpha]
202        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #1
203        ADDS        XY, XY, XY
204        VLD1        {dBeta[]}, [pBeta]
205        ADD         pThresholds, pThresholds, #4
206        BNE         LoopY
207
208        MOV         r0, #OMX_Sts_NoErr
209        M_END
210
211        ENDIF
212
213
214        END
215
216
217