1;//
2;//
3;// File Name:  omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
4;// OpenMAX DL: v1.0.2
5;// Revision:   12290
6;// Date:       Wednesday, April 9, 2008
7;//
8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
9;//
10;//
11;//
12
13
14        INCLUDE omxtypes_s.h
15        INCLUDE armCOMM_s.h
16
17        M_VARIANTS CortexA8
18
19        IF CortexA8
20
21        IMPORT  armVCM4P10_DeblockingChromabSGE4_unsafe
22        IMPORT  armVCM4P10_DeblockingChromabSLT4_unsafe
23
24LOOP_COUNT  EQU 0x40000000
25MASK_3      EQU 0x03030303
26MASK_4      EQU 0x04040404
27
28;// Function arguments
29
30pSrcDst     RN 0
31srcdstStep  RN 1
32pAlpha      RN 2
33pBeta       RN 3
34
35pThresholds RN 5
36pBS         RN 4
37bS3210      RN 6
38
39;// Loop
40
41XY          RN 7
42
43;// Pixels
44dP_0        DN D4.U8
45dP_1        DN D5.U8
46dP_2        DN D6.U8
47dQ_0        DN D8.U8
48dQ_1        DN D9.U8
49dQ_2        DN D10.U8
50
51;// Filtering Decision
52dAlpha      DN D0.U8
53dBeta       DN D2.U8
54
55dFilt       DN D16.U8
56dAqflg      DN D12.U8
57dApflg      DN D17.U8
58
59dAp0q0      DN D13.U8
60dAp1p0      DN D12.U8
61dAq1q0      DN D18.U8
62dAp2p0      DN D19.U8
63dAq2q0      DN D17.U8
64
65qBS3210     QN Q13.U16
66dBS3210     DN D26
67dMask_bs    DN D27
68dFilt_bs    DN D26.U16
69
70;// bSLT4
71dMask_0     DN D14.U8
72dMask_1     DN D15.U8
73dMask_4     DN D1.U16
74
75Mask_4      RN 8
76Mask_3      RN 9
77
78dTemp       DN D19.U8
79
80;// Result
81dP_0t       DN D13.U8
82dQ_0t       DN D31.U8
83
84dP_0n       DN D29.U8
85dQ_0n       DN D24.U8
86
87
88        ;// Function header
89        M_START omxVCM4P10_FilterDeblockingChroma_HorEdge_I, r9, d15
90
91        ;//Arguments on the stack
92        M_ARG   ppThresholds, 4
93        M_ARG   ppBS, 4
94
95        ;// d0-dAlpha_0
96        ;// d2-dBeta_0
97
98        ;load alpha1,beta1 somewhere to avoid more loads
99        VLD1        {dAlpha[]}, [pAlpha]!
100        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #1 ;?
101        SUB         pSrcDst, pSrcDst, srcdstStep
102        VLD1        {dBeta[]}, [pBeta]!
103
104        M_LDR       pBS, ppBS
105        M_LDR       pThresholds, ppThresholds
106
107        LDR         Mask_3, =MASK_3
108        LDR         Mask_4, =MASK_4
109
110        VMOV        dMask_0, #0
111        VMOV        dMask_1, #1
112        VMOV        dMask_4, #4
113
114        LDR         XY, =LOOP_COUNT
115
116        ;// p0-p3 - d4-d7
117        ;// q0-q3 - d8-d11
118LoopY
119        LDR         bS3210, [pBS], #8
120
121        VLD1        dP_2, [pSrcDst], srcdstStep
122        ;1
123        VLD1        dP_1, [pSrcDst], srcdstStep
124        CMP         bS3210, #0
125        VLD1        dP_0, [pSrcDst], srcdstStep
126        ;1
127        VLD1        dQ_0, [pSrcDst], srcdstStep
128        VABD        dAp2p0, dP_2, dP_0
129        VLD1        dQ_1, [pSrcDst], srcdstStep
130        VABD        dAp0q0, dP_0, dQ_0
131        VLD1        dQ_2, [pSrcDst], srcdstStep
132        BEQ         NoFilterBS0
133
134        VABD        dAp1p0, dP_1, dP_0
135        VABD        dAq1q0, dQ_1, dQ_0
136
137        VCGT        dFilt, dAlpha, dAp0q0
138        VMOV.U32    dBS3210[0], bS3210
139        VMAX        dAp1p0, dAq1q0, dAp1p0
140        VMOVL       qBS3210, dBS3210.U8
141        VABD        dAq2q0, dQ_2, dQ_0
142        VCGT        dMask_bs.S16, dBS3210.S16, #0
143
144        VCGT        dAp1p0, dBeta, dAp1p0
145        VCGT        dAp2p0, dBeta, dAp2p0
146
147        VAND        dFilt, dMask_bs.U8
148
149        TST         bS3210, Mask_3
150
151        VCGT        dAq2q0, dBeta, dAq2q0
152        VAND        dFilt, dFilt, dAp1p0
153
154        VAND        dAqflg, dFilt, dAq2q0
155        VAND        dApflg, dFilt, dAp2p0
156
157        ;// bS < 4 Filtering
158        BLNE        armVCM4P10_DeblockingChromabSLT4_unsafe
159
160        TST         bS3210, Mask_4
161
162        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #2
163        VTST        dFilt_bs, dFilt_bs, dMask_4
164
165        ;// bS == 4 Filtering
166        BLNE        armVCM4P10_DeblockingChromabSGE4_unsafe
167
168        VBIT        dP_0n, dP_0t, dFilt_bs
169        VBIT        dQ_0n, dQ_0t, dFilt_bs
170
171        VBIF        dP_0n, dP_0, dFilt
172        VBIF        dQ_0n, dQ_0, dFilt
173
174        ;// Result Storage
175        VST1        dP_0n, [pSrcDst], srcdstStep
176        ADDS        XY, XY, XY
177        VST1        dQ_0n, [pSrcDst], srcdstStep
178
179        BNE         LoopY
180
181        MOV         r0, #OMX_Sts_NoErr
182
183        M_EXIT
184
185NoFilterBS0
186
187        VLD1        {dAlpha[]}, [pAlpha]
188        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #1
189        ADDS        XY, XY, XY
190        VLD1        {dBeta[]}, [pBeta]
191        ADD         pThresholds, pThresholds, #4
192        BNE         LoopY
193
194        MOV         r0, #OMX_Sts_NoErr
195        M_END
196
197        ENDIF
198
199
200        END
201
202
203