omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.S revision 7ea582e1dbdd9a88b2105fbe29ed0ec92cbf70c6
1/*
2 * Copyright (C) 2007-2008 ARM Limited
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17/*
18 *
19 */
20
21    .eabi_attribute 24, 1
22    .eabi_attribute 25, 1
23
24    .arm
25    .fpu neon
26    .text
27
28    .global omxVCM4P10_FilterDeblockingLuma_VerEdge_I
29omxVCM4P10_FilterDeblockingLuma_VerEdge_I:
30    PUSH     {r4-r12,lr}
31    VPUSH    {d8-d15}
32    ADD      r7,r2,#1
33    ADD      r8,r3,#1
34    VLD1.8   {d0[]},[r2]
35    SUB      r0,r0,#4
36    VLD1.8   {d2[]},[r3]
37    LDR      r4,[sp,#0x6c]
38    LDR      r5,[sp,#0x68]
39    MOV      r6,#0
40    VMOV.I8  d14,#0
41    VMOV.I8  d15,#0x1
42    MOV      r9,#0x11000000
43    ADD      r11,r1,r1
44L0x38:
45    LDRH     r12,[r4],#4
46    CMP      r12,#0
47    BEQ      L0x160
48    ADD      r10,r0,r1
49    VLD1.8   {d7},[r0],r11
50    VLD1.8   {d8},[r10],r11
51    VLD1.8   {d5},[r0],r11
52    VZIP.8   d7,d8
53    VLD1.8   {d10},[r10],r11
54    VLD1.8   {d6},[r0],r11
55    VZIP.8   d5,d10
56    VLD1.8   {d9},[r10],r11
57    VLD1.8   {d4},[r0],r11
58    VLD1.8   {d11},[r10],r11
59    VZIP.8   d6,d9
60    VZIP.16  d8,d10
61    VZIP.8   d4,d11
62    SUB      r0,r0,r1,LSL #3
63    VZIP.16  d7,d5
64    VZIP.16  d9,d11
65    VZIP.16  d6,d4
66    VTRN.32  d8,d9
67    VTRN.32  d5,d4
68    VTRN.32  d10,d11
69    VTRN.32  d7,d6
70    VABD.U8  d13,d4,d8
71    VABD.U8  d12,d5,d4
72    VABD.U8  d18,d9,d8
73    VABD.U8  d19,d6,d4
74    TST      r12,#0xff
75    VCGT.U8  d16,d0,d13
76    VMAX.U8  d12,d18,d12
77    VABD.U8  d17,d10,d8
78    VMOVEQ.32 d16[0],r6
79    TST      r12,#0xff00
80    VCGT.U8  d19,d2,d19
81    VCGT.U8  d12,d2,d12
82    VMOVEQ.32 d16[1],r6
83    VCGT.U8  d17,d2,d17
84    VAND     d16,d16,d12
85    TST      r12,#4
86    VAND     d12,d16,d17
87    VAND     d17,d16,d19
88    BNE      L0x17c
89    BL       armVCM4P10_DeblockingLumabSLT4_unsafe
90    VZIP.8   d7,d6
91    VZIP.8   d30,d29
92    VZIP.8   d24,d25
93    VZIP.8   d10,d11
94    VZIP.16  d7,d30
95    ADD      r10,r0,r1
96    VZIP.16  d24,d10
97    VZIP.16  d25,d11
98    VZIP.16  d6,d29
99    VTRN.32  d7,d24
100    VTRN.32  d30,d10
101    VTRN.32  d6,d25
102    VTRN.32  d29,d11
103    VST1.8   {d7},[r0],r11
104    VST1.8   {d24},[r10],r11
105    VST1.8   {d30},[r0],r11
106    VST1.8   {d10},[r10],r11
107    VST1.8   {d6},[r0],r11
108    VST1.8   {d25},[r10],r11
109    ADDS     r9,r9,r9
110    VST1.8   {d29},[r0],r11
111    ADD      r5,r5,#2
112    VST1.8   {d11},[r10],r1
113    SUB      r0,r0,r1,LSL #3
114    VLD1.8   {d0[]},[r7]
115    ADD      r0,r0,#4
116    VLD1.8   {d2[]},[r8]
117    BCC      L0x38
118    B        L0x1f0
119L0x160:
120    ADD      r0,r0,#4
121    ADDS     r9,r9,r9
122    VLD1.8   {d0[]},[r7]
123    ADD      r5,r5,#4
124    VLD1.8   {d2[]},[r8]
125    BCC      L0x38
126    B        L0x1f0
127L0x17c:
128    BL       armVCM4P10_DeblockingLumabSGE4_unsafe
129    VZIP.8   d7,d31
130    VZIP.8   d30,d29
131    VZIP.8   d24,d25
132    VZIP.8   d28,d11
133    VZIP.16  d7,d30
134    ADD      r10,r0,r1
135    VZIP.16  d24,d28
136    VZIP.16  d25,d11
137    VZIP.16  d31,d29
138    VTRN.32  d7,d24
139    VTRN.32  d30,d28
140    VTRN.32  d31,d25
141    VTRN.32  d29,d11
142    VST1.8   {d7},[r0],r11
143    VST1.8   {d24},[r10],r11
144    VST1.8   {d30},[r0],r11
145    VST1.8   {d28},[r10],r11
146    VST1.8   {d31},[r0],r11
147    VST1.8   {d25},[r10],r11
148    ADDS     r9,r9,r9
149    VST1.8   {d29},[r0],r11
150    ADD      r5,r5,#4
151    VST1.8   {d11},[r10],r11
152    SUB      r0,r0,r1,LSL #3
153    VLD1.8   {d0[]},[r7]
154    ADD      r0,r0,#4
155    VLD1.8   {d2[]},[r8]
156    BCC      L0x38
157L0x1f0:
158    SUB      r4,r4,#0xe
159    SUB      r5,r5,#0xe
160    SUB      r0,r0,#0x10
161    VLD1.8   {d0[]},[r2]
162    ADD      r0,r0,r1,LSL #3
163    VLD1.8   {d2[]},[r3]
164    BNE      L0x38
165    MOV      r0,#0
166    VPOP     {d8-d15}
167    POP      {r4-r12,pc}
168
169    .end
170
171