omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1;//
2;// Copyright (C) 2007-2008 ARM Limited
3;//
4;// Licensed under the Apache License, Version 2.0 (the "License");
5;// you may not use this file except in compliance with the License.
6;// You may obtain a copy of the License at
7;//
8;//      http://www.apache.org/licenses/LICENSE-2.0
9;//
10;// Unless required by applicable law or agreed to in writing, software
11;// distributed under the License is distributed on an "AS IS" BASIS,
12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13;// See the License for the specific language governing permissions and
14;// limitations under the License.
15;//
16;//
17;//
18;// File Name:  omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
19;// OpenMAX DL: v1.0.2
20;// Revision:   9641
21;// Date:       Thursday, February 7, 2008
22;//
23;//
24;//
25;//
26
27
28        INCLUDE omxtypes_s.h
29        INCLUDE armCOMM_s.h
30
31        M_VARIANTS ARM1136JS
32
33
34        IF ARM1136JS
35
36MASK_0      EQU 0x00000000
37MASK_1      EQU 0x01010101
38LOOP_COUNT  EQU 0x50000000
39
40;// Declare input registers
41
42pSrcDst     RN 0
43srcdstStep  RN 1
44pAlphaArg   RN 2
45pBetaArg    RN 3
46
47pThresholds RN 6
48pBS         RN 9
49pQ0         RN 0
50bS          RN 10
51
52alpha       RN 6
53alpha0      RN 6
54alpha1      RN 8
55
56beta        RN 7
57beta0       RN 7
58beta1       RN 9
59
60;// Declare Local/Temporary variables
61
62;// Pixels
63p_0         RN 3
64p_1         RN 5
65q_0         RN 8
66q_1         RN 9
67
68;// Filtering
69
70dp0q0       RN 12
71dp1p0       RN 12
72dq1q0       RN 12
73
74ap0q0       RN 4
75filt        RN 2
76
77m00         RN 14
78m01         RN 11
79
80pQ0         RN 0
81Step        RN 1
82
83;// Output
84
85P_0         RN 6
86Q_0         RN 7
87
88;//Declarations for bSLT4 kernel
89
90tC          RN 12
91tC0         RN 5
92tC1         RN 12
93pos         RN 5
94neg         RN 9
95
96;//Declarations for bSGE4 kernel
97
98
99;// Miscellanous
100XY          RN 8
101
102a           RN 10
103t1          RN 10
104t2          RN 12
105t3          RN 14
106t4          RN 6
107t5          RN 5
108
109
110        ;// Allocate stack memory
111        M_ALLOC4 ppThresholds,4
112        M_ALLOC8 pAlphaBeta0,8
113        M_ALLOC8 pAlphaBeta1,8
114        M_ALLOC8 pXYBS,4
115        M_ALLOC4 ppBS,4
116
117        ;// Function header
118        M_START omxVCM4P10_FilterDeblockingChroma_HorEdge_I, r11
119
120        ;//Input arguments on the stack
121        M_ARG   ppThresholdsArg, 4
122        M_ARG   ppBSArg, 4
123
124        LDRB    alpha1, [pAlphaArg,#1]
125        LDRB    beta1,  [pBetaArg,#1]
126        M_LDR   pThresholds, ppThresholdsArg
127        LDR     a,=MASK_1
128        LDRB    beta0,  [pBetaArg]
129        M_STR   pThresholds, ppThresholds
130        LDRB    alpha0, [pAlphaArg]
131
132        MUL     alpha1, alpha1, a
133        MUL     beta1, beta1, a
134        MUL     alpha0, alpha0, a
135        MUL     beta0, beta0, a
136
137        M_STRD  alpha1, beta1, pAlphaBeta1
138        M_LDR   pBS, ppBSArg
139        M_STRD  alpha0, beta0, pAlphaBeta0
140
141        LDR     XY,=LOOP_COUNT
142        M_STRD  XY, pBS, pXYBS
143
144        SUB     pQ0, pQ0, srcdstStep, LSL #1
145LoopY
146LoopX
147;//---------------Load Pixels-------------------
148        LDRH    bS, [pBS], #2
149
150        M_STR   pBS, ppBS
151        M_LDR   p_1, [pQ0],srcdstStep
152
153        CMP     bS, #0
154
155        M_LDR   p_0, [pQ0],srcdstStep
156        M_LDR   q_0, [pQ0],srcdstStep
157        M_LDR   q_1, [pQ0]
158        LDR     m01, =MASK_1                ;//  01010101 mask
159        BEQ     NoFilterBS0
160
161
162        ;// p_0 = [r3p0 r2p0 r1p0 r0p0]
163        ;// p_1 = [r3p1 r2p1 r1p1 r0p1]
164        ;// q_0 = [r3q0 r2q0 r1q0 r0q0]
165        ;// q_1 = [r3q1 r2q1 r1q1 r0q1]
166
167;//--------------Filtering Decision -------------------
168        MOV     m00, #MASK_0                ;//  00000000 mask
169
170        MOV     filt, m01
171        TST     bS, #0xff00
172        MOVEQ   filt, filt, LSR #16
173        TST     bS, #0xff
174        MOVEQ   filt, filt, LSL #16
175        TST     bS, #4
176
177
178        ;// Check |p0-q0|<Alpha
179        USUB8   dp0q0, p_0, q_0
180        USUB8   a, q_0, p_0
181        SEL     ap0q0, a, dp0q0
182        USUB8   a, ap0q0, alpha
183        SEL     filt, m00, filt
184
185        ;// Check |p1-p0|<Beta
186        USUB8   dp1p0, p_1, p_0
187        USUB8   a, p_0, p_1
188        SEL     a, a, dp1p0
189        USUB8   a, a, beta
190        SEL     filt, m00, filt
191
192        ;// Check |q1-q0|<Beta
193        USUB8   dq1q0, q_1, q_0
194        USUB8   a, q_0, q_1
195        SEL     a, a, dq1q0
196        USUB8   a, a, beta
197        SEL     filt, m00, filt
198
199        BEQ     bSLT4
200;//-------------------Filter--------------------
201bSGE4
202        ;//---------bSGE4 Execution---------------
203        CMP     filt, #0
204
205        M_LDR   pThresholds, ppThresholds
206
207        ;// Compute P0b
208        UHADD8  t1, p_0, q_1
209        BEQ     NoFilterFilt0
210        MVN     t2, p_1
211        UHSUB8  t1, t1, t2
212        USUB8   t2, filt, m01
213        EOR     t1, t1, m01, LSL #7
214
215        ADD     pThresholds,pThresholds, #2
216
217        ;// Compute Q0b
218        UHADD8  t2, q_0, p_1
219        MVN     t3, q_1
220        UHSUB8  t2, t2, t3
221        M_STR   pThresholds, ppThresholds
222        SEL     P_0, t1, p_0
223        EOR     t2, t2, m01, LSL #7
224        SEL     Q_0, t2, q_0
225
226        SUB     pQ0, pQ0, srcdstStep, LSL #1
227        B       StoreResultAndExit
228
229;//---------- Exit of LoopX --------------
230;//---- for the case of no filtering -----
231
232NoFilterFilt0
233NoFilterBS0
234        M_LDR   pThresholds, ppThresholds
235        SUB     pQ0, pQ0, srcdstStep, LSL #1
236        SUB     pQ0, pQ0, srcdstStep
237        ADD     pQ0, pQ0, #4
238        ADD     pThresholds, pThresholds, #2
239
240        ;// Load counter for LoopX
241        M_LDRD  XY, pBS, pXYBS
242        M_STR   pThresholds, ppThresholds
243        M_LDRD  alpha, beta, pAlphaBeta0
244
245        ;// Align the pointer
246        ADDS    XY, XY, XY
247        M_STR   XY, pXYBS
248        BCC     LoopY
249        B       ExitLoopY
250
251bSLT4
252        ;//---------bSLT4 Execution---------------
253        M_LDR   pThresholds, ppThresholds
254        CMP     filt, #0
255
256        ;// Since beta <= 18 and alpha <= 255 we know
257        ;// -254 <= p0-q0 <= 254
258        ;//  -17 <= q1-q0 <= 17
259        ;//  -17 <= p1-p0 <= 17
260
261        ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3))
262        ;//
263        ;//    Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3
264        ;//                = (4*q0 - 4*p0 + p1 - q1 + 4)>>3
265        ;//                = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3
266
267        USUB8   t1, p_1, p_0
268        USUB8   t2, q_1, q_0
269        BEQ     NoFilterFilt0
270
271        LDRB    tC0, [pThresholds],#1
272        SSUB8   t1, t1, t2
273        LDRB    tC1, [pThresholds],#1
274        M_STR   pThresholds, ppThresholds
275        UHSUB8  t4, p_0, q_0
276        ORR     tC, tC0, tC1, LSL #16
277        USUB8   t5, p_0, q_0
278        AND     t5, t5, m01
279        SHSUB8  t1, t1, t5
280        ORR     tC, tC, LSL #8
281        SSUB8   t1, t1, t5
282        SHSUB8  t1, t1, t4
283        UQADD8  tC, tC, m01
284        SADD8   t1, t1, m01
285        USUB8   t5, filt, m01
286        SHSUB8  t1, t1, t4
287        SEL     tC, tC, m00
288
289        ;// Split into positive and negative part and clip
290
291        SSUB8   t1, t1, m00
292        SEL     pos, t1, m00
293        USUB8   neg, pos, t1
294        USUB8   t3, pos, tC
295        SEL     pos, tC, pos
296        USUB8   t3, neg, tC
297        SEL     neg, tC, neg
298        UQADD8  P_0, p_0, pos
299        UQSUB8  Q_0, q_0, pos
300        UQSUB8  P_0, P_0, neg
301        UQADD8  Q_0, Q_0, neg
302
303        SUB     pQ0, pQ0, srcdstStep, LSL #1
304
305        ;// Choose to store the filtered
306        ;// value or the original pixel
307        USUB8   t1, filt, m01
308        SEL     P_0, P_0, p_0
309        SEL     Q_0, Q_0, q_0
310
311StoreResultAndExit
312
313        ;//---------Store result---------------
314
315        ;// P_0 = [r0p0 r1p0 r2p0 r3p0]
316        ;// Q_0 = [r0q0 r1q0 r2q0 r3q0]
317
318        M_STR   P_0, [pQ0], srcdstStep
319        STR     Q_0, [pQ0], #4
320
321        M_LDRD  XY, pBS, pXYBS
322        M_LDRD  alpha, beta, pAlphaBeta0
323
324        SUB     pQ0, pQ0, srcdstStep, LSL #1
325
326        ADDS    XY, XY, XY
327        M_STR   XY, pXYBS
328        BCC     LoopX
329
330;//-------- Common Exit of LoopY -----------------
331        ;// Align the pointers
332
333ExitLoopY
334        ADD     pBS, pBS, #4
335        M_LDRD  alpha, beta, pAlphaBeta1
336        SUB     pQ0, pQ0, #8
337        ADD     pQ0, pQ0, srcdstStep, LSL #2
338        M_STRD  alpha, beta, pAlphaBeta0
339
340        BNE     LoopY
341        MOV     r0, #OMX_Sts_NoErr
342
343;//-----------------End Filter--------------------
344        M_END
345
346    ENDIF
347
348        END
349
350
351