omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.S revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1/*
2 * Copyright (C) 2007-2008 ARM Limited
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17/*
18 *
19 */
20
21    .eabi_attribute 24, 1
22    .eabi_attribute 25, 1
23
24    .arm
25    .fpu neon
26    .text
27
28    .global omxVCM4P10_FilterDeblockingLuma_VerEdge_I
29    .func   omxVCM4P10_FilterDeblockingLuma_VerEdge_I
30omxVCM4P10_FilterDeblockingLuma_VerEdge_I:
31    PUSH     {r4-r12,lr}
32    VPUSH    {d8-d15}
33    ADD      r7,r2,#1
34    ADD      r8,r3,#1
35    VLD1.8   {d0[]},[r2]
36    SUB      r0,r0,#4
37    VLD1.8   {d2[]},[r3]
38    LDR      r4,[sp,#0x6c]
39    LDR      r5,[sp,#0x68]
40    MOV      r6,#0
41    VMOV.I8  d14,#0
42    VMOV.I8  d15,#0x1
43    MOV      r9,#0x11000000
44    ADD      r11,r1,r1
45L0x38:
46    LDRH     r12,[r4],#4
47    CMP      r12,#0
48    BEQ      L0x160
49    ADD      r10,r0,r1
50    VLD1.8   {d7},[r0],r11
51    VLD1.8   {d8},[r10],r11
52    VLD1.8   {d5},[r0],r11
53    VZIP.8   d7,d8
54    VLD1.8   {d10},[r10],r11
55    VLD1.8   {d6},[r0],r11
56    VZIP.8   d5,d10
57    VLD1.8   {d9},[r10],r11
58    VLD1.8   {d4},[r0],r11
59    VLD1.8   {d11},[r10],r11
60    VZIP.8   d6,d9
61    VZIP.16  d8,d10
62    VZIP.8   d4,d11
63    SUB      r0,r0,r1,LSL #3
64    VZIP.16  d7,d5
65    VZIP.16  d9,d11
66    VZIP.16  d6,d4
67    VTRN.32  d8,d9
68    VTRN.32  d5,d4
69    VTRN.32  d10,d11
70    VTRN.32  d7,d6
71    VABD.U8  d13,d4,d8
72    VABD.U8  d12,d5,d4
73    VABD.U8  d18,d9,d8
74    VABD.U8  d19,d6,d4
75    TST      r12,#0xff
76    VCGT.U8  d16,d0,d13
77    VMAX.U8  d12,d18,d12
78    VABD.U8  d17,d10,d8
79    VMOVEQ.32 d16[0],r6
80    TST      r12,#0xff00
81    VCGT.U8  d19,d2,d19
82    VCGT.U8  d12,d2,d12
83    VMOVEQ.32 d16[1],r6
84    VCGT.U8  d17,d2,d17
85    VAND     d16,d16,d12
86    TST      r12,#4
87    VAND     d12,d16,d17
88    VAND     d17,d16,d19
89    BNE      L0x17c
90    BL       armVCM4P10_DeblockingLumabSLT4_unsafe
91    VZIP.8   d7,d6
92    VZIP.8   d30,d29
93    VZIP.8   d24,d25
94    VZIP.8   d10,d11
95    VZIP.16  d7,d30
96    ADD      r10,r0,r1
97    VZIP.16  d24,d10
98    VZIP.16  d25,d11
99    VZIP.16  d6,d29
100    VTRN.32  d7,d24
101    VTRN.32  d30,d10
102    VTRN.32  d6,d25
103    VTRN.32  d29,d11
104    VST1.8   {d7},[r0],r11
105    VST1.8   {d24},[r10],r11
106    VST1.8   {d30},[r0],r11
107    VST1.8   {d10},[r10],r11
108    VST1.8   {d6},[r0],r11
109    VST1.8   {d25},[r10],r11
110    ADDS     r9,r9,r9
111    VST1.8   {d29},[r0],r11
112    ADD      r5,r5,#2
113    VST1.8   {d11},[r10],r1
114    SUB      r0,r0,r1,LSL #3
115    VLD1.8   {d0[]},[r7]
116    ADD      r0,r0,#4
117    VLD1.8   {d2[]},[r8]
118    BCC      L0x38
119    B        L0x1f0
120L0x160:
121    ADD      r0,r0,#4
122    ADDS     r9,r9,r9
123    VLD1.8   {d0[]},[r7]
124    ADD      r5,r5,#4
125    VLD1.8   {d2[]},[r8]
126    BCC      L0x38
127    B        L0x1f0
128L0x17c:
129    BL       armVCM4P10_DeblockingLumabSGE4_unsafe
130    VZIP.8   d7,d31
131    VZIP.8   d30,d29
132    VZIP.8   d24,d25
133    VZIP.8   d28,d11
134    VZIP.16  d7,d30
135    ADD      r10,r0,r1
136    VZIP.16  d24,d28
137    VZIP.16  d25,d11
138    VZIP.16  d31,d29
139    VTRN.32  d7,d24
140    VTRN.32  d30,d28
141    VTRN.32  d31,d25
142    VTRN.32  d29,d11
143    VST1.8   {d7},[r0],r11
144    VST1.8   {d24},[r10],r11
145    VST1.8   {d30},[r0],r11
146    VST1.8   {d28},[r10],r11
147    VST1.8   {d31},[r0],r11
148    VST1.8   {d25},[r10],r11
149    ADDS     r9,r9,r9
150    VST1.8   {d29},[r0],r11
151    ADD      r5,r5,#4
152    VST1.8   {d11},[r10],r11
153    SUB      r0,r0,r1,LSL #3
154    VLD1.8   {d0[]},[r7]
155    ADD      r0,r0,#4
156    VLD1.8   {d2[]},[r8]
157    BCC      L0x38
158L0x1f0:
159    SUB      r4,r4,#0xe
160    SUB      r5,r5,#0xe
161    SUB      r0,r0,#0x10
162    VLD1.8   {d0[]},[r2]
163    ADD      r0,r0,r1,LSL #3
164    VLD1.8   {d2[]},[r3]
165    BNE      L0x38
166    MOV      r0,#0
167    VPOP     {d8-d15}
168    POP      {r4-r12,pc}
169    .endfunc
170
171    .end
172
173