1;//
2;//
3;// File Name:  omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
4;// OpenMAX DL: v1.0.2
5;// Revision:   9641
6;// Date:       Thursday, February 7, 2008
7;//
8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
9;//
10;//
11;//
12
13        INCLUDE omxtypes_s.h
14        INCLUDE armCOMM_s.h
15
16        M_VARIANTS ARM1136JS
17
18        IMPORT  armVCM4P10_DeblockingLumabSLT4_unsafe
19        IMPORT  armVCM4P10_DeblockingLumabSGE4_unsafe
20
21
22
23    IF ARM1136JS
24
25
26MASK_0      EQU 0x00000000
27MASK_1      EQU 0x01010101
28MASK_2      EQU 0xff00ff00
29LOOP_COUNT  EQU 0x11110000
30
31;// Declare input registers
32
33pSrcDst     RN 0
34srcdstStep  RN 1
35pAlphaArg   RN 2
36pBetaArg    RN 3
37
38pThresholds RN 14
39pBS         RN 9
40pQ0         RN 0
41bS          RN 2
42
43alpha       RN 6
44alpha0      RN 6
45alpha1      RN 8
46
47beta        RN 7
48beta0       RN 7
49beta1       RN 9
50
51;// Declare Local/Temporary variables
52
53;// Pixels
54p_0         RN 3
55p_1         RN 5
56p_2         RN 4
57p_3         RN 2
58q_0         RN 8
59q_1         RN 9
60q_2         RN 10
61q_3         RN 12
62
63;// Filtering
64
65dp0q0       RN 12
66dp1p0       RN 12
67dq1q0       RN 12
68dp2p0       RN 12
69dq2q0       RN 12
70
71ap0q0       RN 1
72filt        RN 2
73
74m00         RN 14
75m01         RN 11
76
77apflg       RN 0
78aqflg       RN 6
79apqflg      RN 0
80
81
82;//Declarations for bSLT4 kernel
83
84tC0         RN 7
85ptC0        RN 1
86
87pQ0a        RN 0
88Stepa       RN 1
89maska       RN 14
90
91P0a         RN 1
92P1a         RN 8
93Q0a         RN 7
94Q1a         RN 11
95
96;//Declarations for bSGE4 kernel
97
98pQ0b        RN 0
99Stepb       RN 1
100maskb       RN 14
101
102P0b         RN 6
103P1b         RN 7
104P2b         RN 1
105P3b         RN 3
106
107Q0b         RN 9
108Q1b         RN 0
109Q2b         RN 2
110Q3b         RN 3
111
112;// Miscellanous
113XY          RN 8
114t0          RN 3
115t1          RN 12
116t2          RN 14
117t7          RN 7
118t4          RN 4
119t5          RN 1
120t8          RN 6
121a           RN 0
122
123
124
125
126        ;// Allocate stack memory
127        M_ALLOC4 ppThresholds,4
128        M_ALLOC4 pQ_3,4
129        M_ALLOC4 pP_3,4
130        M_ALLOC8 pAlphaBeta0,8
131        M_ALLOC8 pAlphaBeta1,8
132        M_ALLOC8 pXYBS,4
133        M_ALLOC4 ppBS,4
134        M_ALLOC8 ppQ0Step,4
135        M_ALLOC4 pStep,4
136
137        ;// Function header
138        M_START omxVCM4P10_FilterDeblockingLuma_HorEdge_I, r11
139
140        ;//Input arguments on the stack
141        M_ARG   ppThresholdsArg, 4
142        M_ARG   ppBSArg, 4
143
144        LDR     t4,=MASK_1
145
146        LDRB    alpha0, [pAlphaArg]
147        LDRB    beta0,  [pBetaArg]
148        LDRB    alpha1, [pAlphaArg,#1]
149        LDRB    beta1,  [pBetaArg,#1]
150
151        MUL     alpha0, alpha0, t4
152        MUL     beta0, beta0, t4
153        MUL     alpha1, alpha1, t4
154        MUL     beta1, beta1, t4
155
156        M_STRD  alpha0, beta0, pAlphaBeta0
157        M_STRD  alpha1, beta1, pAlphaBeta1
158
159        LDR     XY,=LOOP_COUNT
160        M_LDR   pBS, ppBSArg
161        M_LDR   pThresholds, ppThresholdsArg
162        M_STR   srcdstStep, pStep
163        M_STRD  XY, pBS, pXYBS
164        SUB     pQ0, pQ0, srcdstStep, LSL #2
165        M_STR   pThresholds, ppThresholds
166LoopY
167LoopX
168;//---------------Load Pixels-------------------
169        M_STR   pQ0, ppQ0Step
170        M_LDR   p_3, [pQ0], srcdstStep
171        M_LDR   p_2, [pQ0], srcdstStep
172        M_STR   p_3, pP_3
173        LDRB    bS, [pBS], #1
174        M_STR   pBS, ppBS
175        M_LDR   p_1, [pQ0], srcdstStep
176        CMP     bS, #0
177        M_LDR   p_0, [pQ0], srcdstStep
178        M_LDR   q_0, [pQ0], srcdstStep
179        M_LDR   q_1, [pQ0], srcdstStep
180        M_LDR   q_2, [pQ0], srcdstStep
181        M_LDR   q_3, [pQ0], srcdstStep
182        BEQ     NoFilterBS0
183        CMP     bS, #4
184        M_STR   q_3, pQ_3
185
186;//--------------Filtering Decision -------------------
187        LDR     m01, =MASK_1                ;//  01010101 mask
188        MOV     m00, #MASK_0                ;//  00000000 mask
189
190        ;// Check |p0-q0|<Alpha
191        USUB8   dp0q0, p_0, q_0
192        USUB8   a, q_0, p_0
193        SEL     ap0q0, a, dp0q0
194        USUB8   a, ap0q0, alpha
195        SEL     filt, m00, m01
196
197        ;// Check |p1-p0|<Beta
198        USUB8   dp1p0, p_1, p_0
199        USUB8   a, p_0, p_1
200        SEL     a, a, dp1p0
201        USUB8   a, a, beta
202        SEL     filt, m00, filt
203
204        ;// Check |q1-q0|<Beta
205        USUB8   dq1q0, q_1, q_0
206        USUB8   a, q_0, q_1
207        SEL     a, a, dq1q0
208        USUB8   a, a, beta
209        SEL     filt, m00, filt
210
211        ;// Check ap<Beta
212        USUB8   dp2p0, p_2, p_0
213        USUB8   a, p_0, p_2
214        SEL     a, a, dp2p0
215        USUB8   a, a, beta
216        SEL     apflg, m00, filt            ;// apflg = filt && (ap<beta)
217
218        ;// Check aq<Beta
219        USUB8   dq2q0, q_2, q_0
220        USUB8   t2, q_0, q_2
221        SEL     t2, t2, dq2q0
222        USUB8   t2, t2, beta
223        MOV     t7,#0
224
225        BLT     bSLT4
226;//-------------------Filter--------------------
227bSGE4
228        ;//---------bSGE4 Execution---------------
229        SEL     t1, t7, filt            ;// aqflg = filt && (aq<beta)
230        CMP     filt, #0
231        ORR     apqflg, apflg, t1, LSL #1
232        M_LDRD  pQ0, srcdstStep, ppQ0Step, EQ
233        BEQ     NoFilterFilt0
234
235        BL      armVCM4P10_DeblockingLumabSGE4_unsafe
236
237        ;//---------Store result---------------
238        M_LDR   pThresholds, ppThresholds
239        MOV     p_2, Q1b
240        MOV     p_1, P2b
241        M_LDRD  pQ0b, Stepb, ppQ0Step
242        ADD     pThresholds, #1
243        M_STR   pThresholds, ppThresholds
244        M_STR   p_1, [pQ0b, Stepb]!
245        M_STR   P1b, [pQ0b, Stepb]!
246        M_STR   P0b, [pQ0b, Stepb]!
247        M_STR   Q0b, [pQ0b, Stepb]!
248        STR     p_2, [pQ0b, Stepb]
249        STR     Q2b, [pQ0b, Stepb, LSL #1]
250
251
252        M_LDRD  XY, pBS, pXYBS
253        SUB     pQ0, pQ0b, Stepb, LSL #2
254        ADD     pQ0, pQ0, #4
255        M_LDRD  alpha, beta, pAlphaBeta0
256        ADDS    XY, XY, XY
257        M_STR   XY, pXYBS
258        BCC     LoopX
259        B       ExitLoopY
260
261;//---------- Exit of LoopX --------------
262;//---- for the case of no filtering -----
263
264NoFilterBS0
265        SUB     pQ0, pQ0, srcdstStep, LSL #3
266NoFilterFilt0
267        ADD     pQ0, pQ0, #4
268        ;// Load counter for LoopX
269        M_LDRD  XY, pBS, pXYBS
270        M_LDR   pThresholds, ppThresholds
271        M_LDRD  alpha, beta, pAlphaBeta0
272
273        ;// Align the pointers
274        ADDS    XY, XY, XY
275        ADD     pThresholds, pThresholds, #1
276        M_STR   pThresholds, ppThresholds
277        M_STR   XY, pXYBS
278        BCC     LoopX
279        B       ExitLoopY
280
281bSLT4
282        ;//---------bSLT4 Execution---------------
283        SEL     aqflg, t7, filt            ;// aqflg = filt && (aq<beta)
284        M_LDR   ptC0, ppThresholds
285        CMP     filt, #0
286        M_LDRD  pQ0, srcdstStep, ppQ0Step, EQ
287        BEQ     NoFilterFilt0
288
289        LDRB    tC0, [ptC0], #1
290        M_STR   ptC0, ppThresholds
291
292        BL      armVCM4P10_DeblockingLumabSLT4_unsafe
293
294        ;//---------Store result---------------
295        MOV     p_2, P0a
296        M_LDRD  pQ0a, Stepa, ppQ0Step
297        M_STR   P1a, [pQ0a, Stepa, LSL #1]!
298        M_STR   p_2, [pQ0a, Stepa]!
299        M_STR   Q0a, [pQ0a, Stepa]!
300        STR     Q1a, [pQ0a, Stepa]
301
302        ;// Load counter
303        M_LDRD  XY, pBS, pXYBS
304        M_LDRD  alpha, beta, pAlphaBeta0
305
306        SUB     pQ0, pQ0a, Stepa, LSL #2
307        ADD     pQ0, pQ0, #4
308
309        ADDS    XY, XY, XY
310        M_STR   XY, pXYBS
311        BCC     LoopX
312
313;//-------- Common Exit of LoopY -----------------
314        ;// Align the pointers
315ExitLoopY
316        M_LDRD  alpha, beta, pAlphaBeta1
317        SUB     pQ0, pQ0, #16
318        ADD     pQ0, pQ0, srcdstStep, LSL #2
319        M_STRD  alpha, beta, pAlphaBeta0
320
321        BNE     LoopY
322        MOV     r0, #OMX_Sts_NoErr
323;//-----------------End Filter--------------------
324        M_END
325
326    ENDIF
327
328
329        END
330
331