omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.S revision 7ea582e1dbdd9a88b2105fbe29ed0ec92cbf70c6
1/*
2 * Copyright (C) 2007-2008 ARM Limited
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17/*
18 *
19 */
20
21    .eabi_attribute 24, 1
22    .eabi_attribute 25, 1
23
24    .arm
25    .fpu neon
26    .text
27
28    .global omxVCM4P10_FilterDeblockingChroma_VerEdge_I
29omxVCM4P10_FilterDeblockingChroma_VerEdge_I:
30    PUSH     {r4-r12,lr}
31    VPUSH    {d8-d15}
32    VLD1.8   {d0[]},[r2]!
33    SUB      r0,r0,#4
34    VLD1.8   {d2[]},[r3]!
35    LDR      r4,[sp,#0x6c]
36    LDR      r5,[sp,#0x68]
37    LDR      r8, =0x4040404
38    LDR      r9, =0x3030303
39    VMOV.I8  d14,#0
40    VMOV.I8  d15,#0x1
41    VMOV.I16 d1,#0x4
42    MOV      r7,#0x40000000
43L0x34:
44    LDR      r6,[r4],#8
45    ADD      r10,r0,r1
46    ADD      lr,r1,r1
47    VLD1.8   {d7},[r0],lr
48    VLD1.8   {d8},[r10],lr
49    VLD1.8   {d5},[r0],lr
50    VLD1.8   {d10},[r10],lr
51    VLD1.8   {d6},[r0],lr
52    VLD1.8   {d9},[r10],lr
53    VLD1.8   {d4},[r0],lr
54    VLD1.8   {d11},[r10],lr
55    VZIP.8   d7,d8
56    VZIP.8   d5,d10
57    VZIP.8   d6,d9
58    VZIP.8   d4,d11
59    VZIP.16  d7,d5
60    VZIP.16  d8,d10
61    VZIP.16  d6,d4
62    VZIP.16  d9,d11
63    VTRN.32  d7,d6
64    VTRN.32  d5,d4
65    VTRN.32  d10,d11
66    VTRN.32  d8,d9
67    CMP      r6,#0
68    VABD.U8  d19,d6,d4
69    VABD.U8  d13,d4,d8
70    BEQ      L0x170
71    VABD.U8  d12,d5,d4
72    VABD.U8  d18,d9,d8
73    VMOV.32  d26[0],r6
74    VCGT.U8  d16,d0,d13
75    VMAX.U8  d12,d18,d12
76    VMOVL.U8 q13,d26
77    VABD.U8  d17,d10,d8
78    VCGT.S16 d27,d26,#0
79    VCGT.U8  d12,d2,d12
80    VCGT.U8  d19,d2,d19
81    VAND     d16,d16,d27
82    TST      r6,r9
83    VCGT.U8  d17,d2,d17
84    VAND     d16,d16,d12
85    VAND     d12,d16,d17
86    VAND     d17,d16,d19
87    BLNE     armVCM4P10_DeblockingChromabSLT4_unsafe
88    TST      r6,r8
89    SUB      r0,r0,r1,LSL #3
90    VTST.16  d26,d26,d1
91    BLNE     armVCM4P10_DeblockingChromabSGE4_unsafe
92    VBIT     d29,d13,d26
93    VBIT     d24,d31,d26
94    ADD      r10,r0,#3
95    VBIF     d29,d4,d16
96    ADD      r12,r10,r1
97    ADD      lr,r1,r1
98    VBIF     d24,d8,d16
99    ADDS     r7,r7,r7
100    VST1.8   {d29[0]},[r10],lr
101    VST1.8   {d29[1]},[r12],lr
102    VST1.8   {d29[2]},[r10],lr
103    VST1.8   {d29[3]},[r12],lr
104    VST1.8   {d29[4]},[r10],lr
105    VST1.8   {d29[5]},[r12],lr
106    VST1.8   {d29[6]},[r10],lr
107    VST1.8   {d29[7]},[r12],lr
108    ADD      r12,r0,#4
109    ADD      r10,r12,r1
110    VST1.8   {d24[0]},[r12],lr
111    VST1.8   {d24[1]},[r10],lr
112    VST1.8   {d24[2]},[r12],lr
113    VST1.8   {d24[3]},[r10],lr
114    VST1.8   {d24[4]},[r12],lr
115    VST1.8   {d24[5]},[r10],lr
116    VST1.8   {d24[6]},[r12],lr
117    VST1.8   {d24[7]},[r10],lr
118    ADD      r0,r0,#4
119    BNE      L0x34
120    MOV      r0,#0
121    VPOP     {d8-d15}
122    POP      {r4-r12,pc}
123L0x170:
124    VLD1.8   {d0[]},[r2]
125    ADD      r0,r0,#4
126    SUB      r0,r0,r1,LSL #3
127    ADDS     r7,r7,r7
128    VLD1.8   {d2[]},[r3]
129    ADD      r5,r5,#4
130    BNE      L0x34
131    MOV      r0,#0
132    VPOP     {d8-d15}
133    POP      {r4-r12,pc}
134
135    .end
136
137