omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s revision 0c1bc742181ded4930842b46e9507372f0b1b963
1;//
2;//
3;// File Name:  omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
4;// OpenMAX DL: v1.0.2
5;// Revision:   12290
6;// Date:       Wednesday, April 9, 2008
7;//
8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
9;//
10;//
11;//
12
13        INCLUDE omxtypes_s.h
14        INCLUDE armCOMM_s.h
15
16        M_VARIANTS CortexA8
17
18        IMPORT  armVCM4P10_DeblockingLumabSLT4_unsafe
19        IMPORT  armVCM4P10_DeblockingLumabSGE4_unsafe
20
21        IF CortexA8
22
23LOOP_COUNT  EQU 0x55000000
24
25
26;// Function arguments
27
28pSrcDst     RN 0
29srcdstStep  RN 1
30pAlpha      RN 2
31pBeta       RN 3
32
33pThresholds RN 5
34pBS         RN 4
35bS10        RN 12
36
37pAlpha_0    RN 2
38pBeta_0     RN 3
39
40pAlpha_1    RN 7
41pBeta_1     RN 8
42
43
44
45;// Loop
46
47XY          RN 9
48
49pTmp        RN 6
50step        RN 10
51
52;// Pixels
53dP_0        DN D4.U8
54dP_1        DN D5.U8
55dP_2        DN D6.U8
56dP_3        DN D7.U8
57dQ_0        DN D8.U8
58dQ_1        DN D9.U8
59dQ_2        DN D10.U8
60dQ_3        DN D11.U8
61
62
63;// Filtering Decision
64dAlpha      DN D0.U8
65dBeta       DN D2.U8
66
67dFilt       DN D16.U8
68dAqflg      DN D12.U8
69dApflg      DN D17.U8
70
71dAp0q0      DN D13.U8
72dAp1p0      DN D12.U8
73dAq1q0      DN D18.U8
74dAp2p0      DN D19.U8
75dAq2q0      DN D17.U8
76
77;// bSLT4
78dTC0        DN D18.U8
79dTC1        DN D19.U8
80dTC01       DN D18.U8
81
82dTCs        DN D31.S8
83dTC         DN D31.U8
84
85dMask_0     DN D14.U8
86dMask_1     DN D15.U8
87
88Mask_0      RN 11
89
90dTemp       DN D19.U8
91
92;// Computing P0,Q0
93qDq0p0      QN Q10.S16
94qDp1q1      QN Q11.S16
95qDelta      QN Q10.S16  ; reuse qDq0p0
96dDelta      DN D20.S8
97
98
99;// Computing P1,Q1
100dRp0q0      DN D24.U8
101
102dMaxP       DN D23.U8
103dMinP       DN D22.U8
104
105dMaxQ       DN D19.U8
106dMinQ       DN D21.U8
107
108dDeltaP     DN D26.U8
109dDeltaQ     DN D27.U8
110
111qP_0n       QN Q14.S16
112qQ_0n       QN Q12.S16
113
114dQ_0n       DN D24.U8
115dQ_1n       DN D25.U8
116dP_0n       DN D29.U8
117dP_1n       DN D30.U8
118
119;// bSGE4
120
121qSp0q0      QN Q10.U16
122
123qSp2q1      QN Q11.U16
124qSp0q0p1    QN Q12.U16
125qSp3p2      QN Q13.U16
126dHSp0q1     DN D28.U8
127
128qSq2p1      QN Q11.U16
129qSp0q0q1    QN Q12.U16
130qSq3q2      QN Q13.U16  ;!!
131dHSq0p1     DN D28.U8   ;!!
132
133qTemp1      QN Q11.U16  ;!!;qSp2q1
134qTemp2      QN Q12.U16  ;!!;qSp0q0p1
135
136dP_0t       DN D28.U8   ;!!;dHSp0q1
137dQ_0t       DN D22.U8   ;!!;Temp1
138
139dP_0n       DN D29.U8
140dP_1n       DN D30.U8
141dP_2n       DN D31.U8
142
143dQ_0n       DN D24.U8   ;!!;Temp2
144dQ_1n       DN D25.U8   ;!!;Temp2
145dQ_2n       DN D28.U8   ;!!;dQ_0t
146
147
148        ;// Function header
149        M_START omxVCM4P10_FilterDeblockingLuma_HorEdge_I, r11, d15
150
151        ;//Arguments on the stack
152        M_ARG   ppThresholds, 4
153        M_ARG   ppBS, 4
154
155        ;// d0-dAlpha_0
156        ;// d2-dBeta_0
157
158        ADD         pAlpha_1, pAlpha_0, #1
159        ADD         pBeta_1, pBeta_0, #1
160
161        VLD1        {dAlpha[]}, [pAlpha_0]
162        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #2
163        VLD1        {dBeta[]}, [pBeta_0]
164
165        M_LDR       pBS, ppBS
166        M_LDR       pThresholds, ppThresholds
167
168        MOV         Mask_0,#0
169
170        ;dMask_0-14
171        ;dMask_1-15
172
173        VMOV        dMask_0, #0
174        VMOV        dMask_1, #1
175
176        ADD         step, srcdstStep, srcdstStep
177
178        LDR         XY,=LOOP_COUNT
179
180        ;// p0-p3 - d4-d7
181        ;// q0-q3 - d8-d11
182LoopY
183LoopX
184        LDRH        bS10, [pBS], #2
185        ADD         pTmp, pSrcDst, srcdstStep
186        CMP         bS10, #0
187        BEQ         NoFilterBS0
188
189        VLD1        dP_3, [pSrcDst], step
190        VLD1        dP_2, [pTmp], step
191        VLD1        dP_1, [pSrcDst], step
192        VLD1        dP_0, [pTmp], step
193        VLD1        dQ_0, [pSrcDst], step
194        VABD        dAp1p0, dP_0, dP_1
195        VLD1        dQ_1, [pTmp]
196        VABD        dAp0q0, dQ_0, dP_0
197        VLD1        dQ_2, [pSrcDst], srcdstStep
198
199        VABD        dAq1q0, dQ_1, dQ_0
200        VABD        dAp2p0, dP_2, dP_0
201        VCGT        dFilt, dAlpha, dAp0q0
202
203        TST         bS10, #0xff
204        VMAX        dAp1p0, dAq1q0, dAp1p0
205        VABD        dAq2q0, dQ_2, dQ_0
206
207        VMOVEQ.U32  dFilt[0], Mask_0
208        TST         bS10, #0xff00
209
210        VCGT        dAp2p0, dBeta, dAp2p0
211        VCGT        dAp1p0, dBeta, dAp1p0
212
213        VMOVEQ.U32  dFilt[1], Mask_0
214
215        VCGT        dAq2q0, dBeta, dAq2q0
216        VLD1        dQ_3, [pSrcDst]
217        VAND        dFilt, dFilt, dAp1p0
218        TST         bS10, #4
219
220        VAND        dAqflg, dFilt, dAq2q0
221        VAND        dApflg, dFilt, dAp2p0
222
223        BNE         bSGE4
224bSLT4
225        ;// bS < 4 Filtering
226        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #2
227        SUB         pSrcDst, pSrcDst, srcdstStep
228
229        BL          armVCM4P10_DeblockingLumabSLT4_unsafe
230
231        ;// Result Storage
232        VST1        dP_1n, [pSrcDst], srcdstStep
233        VST1        dP_0n, [pSrcDst], srcdstStep
234        SUB         pTmp, pSrcDst, srcdstStep, LSL #2
235        VST1        dQ_0n, [pSrcDst], srcdstStep
236        ADDS        XY, XY, XY
237        VST1        dQ_1n, [pSrcDst]
238        ADD         pSrcDst, pTmp, #8
239
240        BCC         LoopX
241        B           ExitLoopY
242
243NoFilterBS0
244        ADD         pSrcDst, pSrcDst, #8
245        ADDS        XY, XY, XY
246        ADD         pThresholds, pThresholds, #2
247        BCC         LoopX
248        B           ExitLoopY
249bSGE4
250        ;// bS >= 4 Filtering
251        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #2
252        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #1
253        BL          armVCM4P10_DeblockingLumabSGE4_unsafe
254
255        ;// Result Storage
256        VST1        dP_2n, [pSrcDst], srcdstStep
257        VST1        dP_1n, [pSrcDst], srcdstStep
258        VST1        dP_0n, [pSrcDst], srcdstStep
259        SUB         pTmp, pSrcDst, srcdstStep, LSL #2
260        VST1        dQ_0n, [pSrcDst], srcdstStep
261        ADDS        XY,XY,XY
262        VST1        dQ_1n, [pSrcDst], srcdstStep
263        ADD         pThresholds, pThresholds, #2
264        VST1        dQ_2n, [pSrcDst]
265
266        ADD         pSrcDst, pTmp, #8
267        BCC         LoopX
268
269ExitLoopY
270
271        SUB         pSrcDst, pSrcDst, #16
272        VLD1        {dAlpha[]}, [pAlpha_1]
273        ADD         pSrcDst, pSrcDst, srcdstStep, LSL #2
274        VLD1        {dBeta[]}, [pBeta_1]
275        BNE         LoopY
276
277        MOV         r0, #OMX_Sts_NoErr
278
279        M_END
280
281    ENDIF
282
283
284
285
286        END
287
288
289