omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1;//
2;// Copyright (C) 2007-2008 ARM Limited
3;//
4;// Licensed under the Apache License, Version 2.0 (the "License");
5;// you may not use this file except in compliance with the License.
6;// You may obtain a copy of the License at
7;//
8;//      http://www.apache.org/licenses/LICENSE-2.0
9;//
10;// Unless required by applicable law or agreed to in writing, software
11;// distributed under the License is distributed on an "AS IS" BASIS,
12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13;// See the License for the specific language governing permissions and
14;// limitations under the License.
15;//
16;//
17;//
18;// File Name:  omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
19;// OpenMAX DL: v1.0.2
20;// Revision:   9641
21;// Date:       Thursday, February 7, 2008
22;//
23;//
24;//
25;//
26
27        INCLUDE omxtypes_s.h
28        INCLUDE armCOMM_s.h
29
30        M_VARIANTS ARM1136JS
31
32        IMPORT  armVCM4P10_DeblockingLumabSLT4_unsafe
33        IMPORT  armVCM4P10_DeblockingLumabSGE4_unsafe
34
35
36
37    IF ARM1136JS
38
39
40MASK_0      EQU 0x00000000
41MASK_1      EQU 0x01010101
42MASK_2      EQU 0xff00ff00
43LOOP_COUNT  EQU 0x11110000
44
45;// Declare input registers
46
47pSrcDst     RN 0
48srcdstStep  RN 1
49pAlphaArg   RN 2
50pBetaArg    RN 3
51
52pThresholds RN 14
53pBS         RN 9
54pQ0         RN 0
55bS          RN 2
56
57alpha       RN 6
58alpha0      RN 6
59alpha1      RN 8
60
61beta        RN 7
62beta0       RN 7
63beta1       RN 9
64
65;// Declare Local/Temporary variables
66
67;// Pixels
68p_0         RN 3
69p_1         RN 5
70p_2         RN 4
71p_3         RN 2
72q_0         RN 8
73q_1         RN 9
74q_2         RN 10
75q_3         RN 12
76
77;// Filtering
78
79dp0q0       RN 12
80dp1p0       RN 12
81dq1q0       RN 12
82dp2p0       RN 12
83dq2q0       RN 12
84
85ap0q0       RN 1
86filt        RN 2
87
88m00         RN 14
89m01         RN 11
90
91apflg       RN 0
92aqflg       RN 6
93apqflg      RN 0
94
95
96;//Declarations for bSLT4 kernel
97
98tC0         RN 7
99ptC0        RN 1
100
101pQ0a        RN 0
102Stepa       RN 1
103maska       RN 14
104
105P0a         RN 1
106P1a         RN 8
107Q0a         RN 7
108Q1a         RN 11
109
110;//Declarations for bSGE4 kernel
111
112pQ0b        RN 0
113Stepb       RN 1
114maskb       RN 14
115
116P0b         RN 6
117P1b         RN 7
118P2b         RN 1
119P3b         RN 3
120
121Q0b         RN 9
122Q1b         RN 0
123Q2b         RN 2
124Q3b         RN 3
125
126;// Miscellanous
127XY          RN 8
128t0          RN 3
129t1          RN 12
130t2          RN 14
131t7          RN 7
132t4          RN 4
133t5          RN 1
134t8          RN 6
135a           RN 0
136
137
138
139
140        ;// Allocate stack memory
141        M_ALLOC4 ppThresholds,4
142        M_ALLOC4 pQ_3,4
143        M_ALLOC4 pP_3,4
144        M_ALLOC8 pAlphaBeta0,8
145        M_ALLOC8 pAlphaBeta1,8
146        M_ALLOC8 pXYBS,4
147        M_ALLOC4 ppBS,4
148        M_ALLOC8 ppQ0Step,4
149        M_ALLOC4 pStep,4
150
151        ;// Function header
152        M_START omxVCM4P10_FilterDeblockingLuma_HorEdge_I, r11
153
154        ;//Input arguments on the stack
155        M_ARG   ppThresholdsArg, 4
156        M_ARG   ppBSArg, 4
157
158        LDR     t4,=MASK_1
159
160        LDRB    alpha0, [pAlphaArg]
161        LDRB    beta0,  [pBetaArg]
162        LDRB    alpha1, [pAlphaArg,#1]
163        LDRB    beta1,  [pBetaArg,#1]
164
165        MUL     alpha0, alpha0, t4
166        MUL     beta0, beta0, t4
167        MUL     alpha1, alpha1, t4
168        MUL     beta1, beta1, t4
169
170        M_STRD  alpha0, beta0, pAlphaBeta0
171        M_STRD  alpha1, beta1, pAlphaBeta1
172
173        LDR     XY,=LOOP_COUNT
174        M_LDR   pBS, ppBSArg
175        M_LDR   pThresholds, ppThresholdsArg
176        M_STR   srcdstStep, pStep
177        M_STRD  XY, pBS, pXYBS
178        SUB     pQ0, pQ0, srcdstStep, LSL #2
179        M_STR   pThresholds, ppThresholds
180LoopY
181LoopX
182;//---------------Load Pixels-------------------
183        M_STR   pQ0, ppQ0Step
184        M_LDR   p_3, [pQ0], srcdstStep
185        M_LDR   p_2, [pQ0], srcdstStep
186        M_STR   p_3, pP_3
187        LDRB    bS, [pBS], #1
188        M_STR   pBS, ppBS
189        M_LDR   p_1, [pQ0], srcdstStep
190        CMP     bS, #0
191        M_LDR   p_0, [pQ0], srcdstStep
192        M_LDR   q_0, [pQ0], srcdstStep
193        M_LDR   q_1, [pQ0], srcdstStep
194        M_LDR   q_2, [pQ0], srcdstStep
195        M_LDR   q_3, [pQ0], srcdstStep
196        BEQ     NoFilterBS0
197        CMP     bS, #4
198        M_STR   q_3, pQ_3
199
200;//--------------Filtering Decision -------------------
201        LDR     m01, =MASK_1                ;//  01010101 mask
202        MOV     m00, #MASK_0                ;//  00000000 mask
203
204        ;// Check |p0-q0|<Alpha
205        USUB8   dp0q0, p_0, q_0
206        USUB8   a, q_0, p_0
207        SEL     ap0q0, a, dp0q0
208        USUB8   a, ap0q0, alpha
209        SEL     filt, m00, m01
210
211        ;// Check |p1-p0|<Beta
212        USUB8   dp1p0, p_1, p_0
213        USUB8   a, p_0, p_1
214        SEL     a, a, dp1p0
215        USUB8   a, a, beta
216        SEL     filt, m00, filt
217
218        ;// Check |q1-q0|<Beta
219        USUB8   dq1q0, q_1, q_0
220        USUB8   a, q_0, q_1
221        SEL     a, a, dq1q0
222        USUB8   a, a, beta
223        SEL     filt, m00, filt
224
225        ;// Check ap<Beta
226        USUB8   dp2p0, p_2, p_0
227        USUB8   a, p_0, p_2
228        SEL     a, a, dp2p0
229        USUB8   a, a, beta
230        SEL     apflg, m00, filt            ;// apflg = filt && (ap<beta)
231
232        ;// Check aq<Beta
233        USUB8   dq2q0, q_2, q_0
234        USUB8   t2, q_0, q_2
235        SEL     t2, t2, dq2q0
236        USUB8   t2, t2, beta
237        MOV     t7,#0
238
239        BLT     bSLT4
240;//-------------------Filter--------------------
241bSGE4
242        ;//---------bSGE4 Execution---------------
243        SEL     t1, t7, filt            ;// aqflg = filt && (aq<beta)
244        CMP     filt, #0
245        ORR     apqflg, apflg, t1, LSL #1
246        M_LDRD  pQ0, srcdstStep, ppQ0Step, EQ
247        BEQ     NoFilterFilt0
248
249        BL      armVCM4P10_DeblockingLumabSGE4_unsafe
250
251        ;//---------Store result---------------
252        M_LDR   pThresholds, ppThresholds
253        MOV     p_2, Q1b
254        MOV     p_1, P2b
255        M_LDRD  pQ0b, Stepb, ppQ0Step
256        ADD     pThresholds, #1
257        M_STR   pThresholds, ppThresholds
258        M_STR   p_1, [pQ0b, Stepb]!
259        M_STR   P1b, [pQ0b, Stepb]!
260        M_STR   P0b, [pQ0b, Stepb]!
261        M_STR   Q0b, [pQ0b, Stepb]!
262        STR     p_2, [pQ0b, Stepb]
263        STR     Q2b, [pQ0b, Stepb, LSL #1]
264
265
266        M_LDRD  XY, pBS, pXYBS
267        SUB     pQ0, pQ0b, Stepb, LSL #2
268        ADD     pQ0, pQ0, #4
269        M_LDRD  alpha, beta, pAlphaBeta0
270        ADDS    XY, XY, XY
271        M_STR   XY, pXYBS
272        BCC     LoopX
273        B       ExitLoopY
274
275;//---------- Exit of LoopX --------------
276;//---- for the case of no filtering -----
277
278NoFilterBS0
279        SUB     pQ0, pQ0, srcdstStep, LSL #3
280NoFilterFilt0
281        ADD     pQ0, pQ0, #4
282        ;// Load counter for LoopX
283        M_LDRD  XY, pBS, pXYBS
284        M_LDR   pThresholds, ppThresholds
285        M_LDRD  alpha, beta, pAlphaBeta0
286
287        ;// Align the pointers
288        ADDS    XY, XY, XY
289        ADD     pThresholds, pThresholds, #1
290        M_STR   pThresholds, ppThresholds
291        M_STR   XY, pXYBS
292        BCC     LoopX
293        B       ExitLoopY
294
295bSLT4
296        ;//---------bSLT4 Execution---------------
297        SEL     aqflg, t7, filt            ;// aqflg = filt && (aq<beta)
298        M_LDR   ptC0, ppThresholds
299        CMP     filt, #0
300        M_LDRD  pQ0, srcdstStep, ppQ0Step, EQ
301        BEQ     NoFilterFilt0
302
303        LDRB    tC0, [ptC0], #1
304        M_STR   ptC0, ppThresholds
305
306        BL      armVCM4P10_DeblockingLumabSLT4_unsafe
307
308        ;//---------Store result---------------
309        MOV     p_2, P0a
310        M_LDRD  pQ0a, Stepa, ppQ0Step
311        M_STR   P1a, [pQ0a, Stepa, LSL #1]!
312        M_STR   p_2, [pQ0a, Stepa]!
313        M_STR   Q0a, [pQ0a, Stepa]!
314        STR     Q1a, [pQ0a, Stepa]
315
316        ;// Load counter
317        M_LDRD  XY, pBS, pXYBS
318        M_LDRD  alpha, beta, pAlphaBeta0
319
320        SUB     pQ0, pQ0a, Stepa, LSL #2
321        ADD     pQ0, pQ0, #4
322
323        ADDS    XY, XY, XY
324        M_STR   XY, pXYBS
325        BCC     LoopX
326
327;//-------- Common Exit of LoopY -----------------
328        ;// Align the pointers
329ExitLoopY
330        M_LDRD  alpha, beta, pAlphaBeta1
331        SUB     pQ0, pQ0, #16
332        ADD     pQ0, pQ0, srcdstStep, LSL #2
333        M_STRD  alpha, beta, pAlphaBeta0
334
335        BNE     LoopY
336        MOV     r0, #OMX_Sts_NoErr
337;//-----------------End Filter--------------------
338        M_END
339
340    ENDIF
341
342
343        END
344
345
346