1;//
2;//
3;// File Name:  omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
4;// OpenMAX DL: v1.0.2
5;// Revision:   9641
6;// Date:       Thursday, February 7, 2008
7;//
8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
9;//
10;//
11;//
12
13
14        INCLUDE omxtypes_s.h
15        INCLUDE armCOMM_s.h
16
17        M_VARIANTS ARM1136JS
18
19
20        IF ARM1136JS
21
22MASK_0      EQU 0x00000000
23MASK_1      EQU 0x01010101
24LOOP_COUNT  EQU 0x50000000
25
26;// Declare input registers
27
28pSrcDst     RN 0
29srcdstStep  RN 1
30pAlphaArg   RN 2
31pBetaArg    RN 3
32
33pThresholds RN 6
34pBS         RN 9
35pQ0         RN 0
36bS          RN 10
37
38alpha       RN 6
39alpha0      RN 6
40alpha1      RN 8
41
42beta        RN 7
43beta0       RN 7
44beta1       RN 9
45
46;// Declare Local/Temporary variables
47
48;// Pixels
49p_0         RN 3
50p_1         RN 5
51q_0         RN 8
52q_1         RN 9
53
54;// Filtering
55
56dp0q0       RN 12
57dp1p0       RN 12
58dq1q0       RN 12
59
60ap0q0       RN 4
61filt        RN 2
62
63m00         RN 14
64m01         RN 11
65
66pQ0         RN 0
67Step        RN 1
68
69;// Output
70
71P_0         RN 6
72Q_0         RN 7
73
74;//Declarations for bSLT4 kernel
75
76tC          RN 12
77tC0         RN 5
78tC1         RN 12
79pos         RN 5
80neg         RN 9
81
82;//Declarations for bSGE4 kernel
83
84
85;// Miscellanous
86XY          RN 8
87
88a           RN 10
89t1          RN 10
90t2          RN 12
91t3          RN 14
92t4          RN 6
93t5          RN 5
94
95
96        ;// Allocate stack memory
97        M_ALLOC4 ppThresholds,4
98        M_ALLOC8 pAlphaBeta0,8
99        M_ALLOC8 pAlphaBeta1,8
100        M_ALLOC8 pXYBS,4
101        M_ALLOC4 ppBS,4
102
103        ;// Function header
104        M_START omxVCM4P10_FilterDeblockingChroma_HorEdge_I, r11
105
106        ;//Input arguments on the stack
107        M_ARG   ppThresholdsArg, 4
108        M_ARG   ppBSArg, 4
109
110        LDRB    alpha1, [pAlphaArg,#1]
111        LDRB    beta1,  [pBetaArg,#1]
112        M_LDR   pThresholds, ppThresholdsArg
113        LDR     a,=MASK_1
114        LDRB    beta0,  [pBetaArg]
115        M_STR   pThresholds, ppThresholds
116        LDRB    alpha0, [pAlphaArg]
117
118        MUL     alpha1, alpha1, a
119        MUL     beta1, beta1, a
120        MUL     alpha0, alpha0, a
121        MUL     beta0, beta0, a
122
123        M_STRD  alpha1, beta1, pAlphaBeta1
124        M_LDR   pBS, ppBSArg
125        M_STRD  alpha0, beta0, pAlphaBeta0
126
127        LDR     XY,=LOOP_COUNT
128        M_STRD  XY, pBS, pXYBS
129
130        SUB     pQ0, pQ0, srcdstStep, LSL #1
131LoopY
132LoopX
133;//---------------Load Pixels-------------------
134        LDRH    bS, [pBS], #2
135
136        M_STR   pBS, ppBS
137        M_LDR   p_1, [pQ0],srcdstStep
138
139        CMP     bS, #0
140
141        M_LDR   p_0, [pQ0],srcdstStep
142        M_LDR   q_0, [pQ0],srcdstStep
143        M_LDR   q_1, [pQ0]
144        LDR     m01, =MASK_1                ;//  01010101 mask
145        BEQ     NoFilterBS0
146
147
148        ;// p_0 = [r3p0 r2p0 r1p0 r0p0]
149        ;// p_1 = [r3p1 r2p1 r1p1 r0p1]
150        ;// q_0 = [r3q0 r2q0 r1q0 r0q0]
151        ;// q_1 = [r3q1 r2q1 r1q1 r0q1]
152
153;//--------------Filtering Decision -------------------
154        MOV     m00, #MASK_0                ;//  00000000 mask
155
156        MOV     filt, m01
157        TST     bS, #0xff00
158        MOVEQ   filt, filt, LSR #16
159        TST     bS, #0xff
160        MOVEQ   filt, filt, LSL #16
161        TST     bS, #4
162
163
164        ;// Check |p0-q0|<Alpha
165        USUB8   dp0q0, p_0, q_0
166        USUB8   a, q_0, p_0
167        SEL     ap0q0, a, dp0q0
168        USUB8   a, ap0q0, alpha
169        SEL     filt, m00, filt
170
171        ;// Check |p1-p0|<Beta
172        USUB8   dp1p0, p_1, p_0
173        USUB8   a, p_0, p_1
174        SEL     a, a, dp1p0
175        USUB8   a, a, beta
176        SEL     filt, m00, filt
177
178        ;// Check |q1-q0|<Beta
179        USUB8   dq1q0, q_1, q_0
180        USUB8   a, q_0, q_1
181        SEL     a, a, dq1q0
182        USUB8   a, a, beta
183        SEL     filt, m00, filt
184
185        BEQ     bSLT4
186;//-------------------Filter--------------------
187bSGE4
188        ;//---------bSGE4 Execution---------------
189        CMP     filt, #0
190
191        M_LDR   pThresholds, ppThresholds
192
193        ;// Compute P0b
194        UHADD8  t1, p_0, q_1
195        BEQ     NoFilterFilt0
196        MVN     t2, p_1
197        UHSUB8  t1, t1, t2
198        USUB8   t2, filt, m01
199        EOR     t1, t1, m01, LSL #7
200
201        ADD     pThresholds,pThresholds, #2
202
203        ;// Compute Q0b
204        UHADD8  t2, q_0, p_1
205        MVN     t3, q_1
206        UHSUB8  t2, t2, t3
207        M_STR   pThresholds, ppThresholds
208        SEL     P_0, t1, p_0
209        EOR     t2, t2, m01, LSL #7
210        SEL     Q_0, t2, q_0
211
212        SUB     pQ0, pQ0, srcdstStep, LSL #1
213        B       StoreResultAndExit
214
215;//---------- Exit of LoopX --------------
216;//---- for the case of no filtering -----
217
218NoFilterFilt0
219NoFilterBS0
220        M_LDR   pThresholds, ppThresholds
221        SUB     pQ0, pQ0, srcdstStep, LSL #1
222        SUB     pQ0, pQ0, srcdstStep
223        ADD     pQ0, pQ0, #4
224        ADD     pThresholds, pThresholds, #2
225
226        ;// Load counter for LoopX
227        M_LDRD  XY, pBS, pXYBS
228        M_STR   pThresholds, ppThresholds
229        M_LDRD  alpha, beta, pAlphaBeta0
230
231        ;// Align the pointer
232        ADDS    XY, XY, XY
233        M_STR   XY, pXYBS
234        BCC     LoopY
235        B       ExitLoopY
236
237bSLT4
238        ;//---------bSLT4 Execution---------------
239        M_LDR   pThresholds, ppThresholds
240        CMP     filt, #0
241
242        ;// Since beta <= 18 and alpha <= 255 we know
243        ;// -254 <= p0-q0 <= 254
244        ;//  -17 <= q1-q0 <= 17
245        ;//  -17 <= p1-p0 <= 17
246
247        ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3))
248        ;//
249        ;//    Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3
250        ;//                = (4*q0 - 4*p0 + p1 - q1 + 4)>>3
251        ;//                = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3
252
253        USUB8   t1, p_1, p_0
254        USUB8   t2, q_1, q_0
255        BEQ     NoFilterFilt0
256
257        LDRB    tC0, [pThresholds],#1
258        SSUB8   t1, t1, t2
259        LDRB    tC1, [pThresholds],#1
260        M_STR   pThresholds, ppThresholds
261        UHSUB8  t4, p_0, q_0
262        ORR     tC, tC0, tC1, LSL #16
263        USUB8   t5, p_0, q_0
264        AND     t5, t5, m01
265        SHSUB8  t1, t1, t5
266        ORR     tC, tC, LSL #8
267        SSUB8   t1, t1, t5
268        SHSUB8  t1, t1, t4
269        UQADD8  tC, tC, m01
270        SADD8   t1, t1, m01
271        USUB8   t5, filt, m01
272        SHSUB8  t1, t1, t4
273        SEL     tC, tC, m00
274
275        ;// Split into positive and negative part and clip
276
277        SSUB8   t1, t1, m00
278        SEL     pos, t1, m00
279        USUB8   neg, pos, t1
280        USUB8   t3, pos, tC
281        SEL     pos, tC, pos
282        USUB8   t3, neg, tC
283        SEL     neg, tC, neg
284        UQADD8  P_0, p_0, pos
285        UQSUB8  Q_0, q_0, pos
286        UQSUB8  P_0, P_0, neg
287        UQADD8  Q_0, Q_0, neg
288
289        SUB     pQ0, pQ0, srcdstStep, LSL #1
290
291        ;// Choose to store the filtered
292        ;// value or the original pixel
293        USUB8   t1, filt, m01
294        SEL     P_0, P_0, p_0
295        SEL     Q_0, Q_0, q_0
296
297StoreResultAndExit
298
299        ;//---------Store result---------------
300
301        ;// P_0 = [r0p0 r1p0 r2p0 r3p0]
302        ;// Q_0 = [r0q0 r1q0 r2q0 r3q0]
303
304        M_STR   P_0, [pQ0], srcdstStep
305        STR     Q_0, [pQ0], #4
306
307        M_LDRD  XY, pBS, pXYBS
308        M_LDRD  alpha, beta, pAlphaBeta0
309
310        SUB     pQ0, pQ0, srcdstStep, LSL #1
311
312        ADDS    XY, XY, XY
313        M_STR   XY, pXYBS
314        BCC     LoopX
315
316;//-------- Common Exit of LoopY -----------------
317        ;// Align the pointers
318
319ExitLoopY
320        ADD     pBS, pBS, #4
321        M_LDRD  alpha, beta, pAlphaBeta1
322        SUB     pQ0, pQ0, #8
323        ADD     pQ0, pQ0, srcdstStep, LSL #2
324        M_STRD  alpha, beta, pAlphaBeta0
325
326        BNE     LoopY
327        MOV     r0, #OMX_Sts_NoErr
328
329;//-----------------End Filter--------------------
330        M_END
331
332    ENDIF
333
334        END
335
336
337