omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.S revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1/*
2 * Copyright (C) 2007-2008 ARM Limited
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17/*
18 *
19 */
20
21    .eabi_attribute 24, 1
22    .eabi_attribute 25, 1
23
24    .arm
25    .fpu neon
26    .text
27
28    .global omxVCM4P10_FilterDeblockingChroma_VerEdge_I
29    .func   omxVCM4P10_FilterDeblockingChroma_VerEdge_I
30omxVCM4P10_FilterDeblockingChroma_VerEdge_I:
31    PUSH     {r4-r12,lr}
32    VPUSH    {d8-d15}
33    VLD1.8   {d0[]},[r2]!
34    SUB      r0,r0,#4
35    VLD1.8   {d2[]},[r3]!
36    LDR      r4,[sp,#0x6c]
37    LDR      r5,[sp,#0x68]
38    LDR      r8, =0x4040404
39    LDR      r9, =0x3030303
40    VMOV.I8  d14,#0
41    VMOV.I8  d15,#0x1
42    VMOV.I16 d1,#0x4
43    MOV      r7,#0x40000000
44L0x34:
45    LDR      r6,[r4],#8
46    ADD      r10,r0,r1
47    ADD      lr,r1,r1
48    VLD1.8   {d7},[r0],lr
49    VLD1.8   {d8},[r10],lr
50    VLD1.8   {d5},[r0],lr
51    VLD1.8   {d10},[r10],lr
52    VLD1.8   {d6},[r0],lr
53    VLD1.8   {d9},[r10],lr
54    VLD1.8   {d4},[r0],lr
55    VLD1.8   {d11},[r10],lr
56    VZIP.8   d7,d8
57    VZIP.8   d5,d10
58    VZIP.8   d6,d9
59    VZIP.8   d4,d11
60    VZIP.16  d7,d5
61    VZIP.16  d8,d10
62    VZIP.16  d6,d4
63    VZIP.16  d9,d11
64    VTRN.32  d7,d6
65    VTRN.32  d5,d4
66    VTRN.32  d10,d11
67    VTRN.32  d8,d9
68    CMP      r6,#0
69    VABD.U8  d19,d6,d4
70    VABD.U8  d13,d4,d8
71    BEQ      L0x170
72    VABD.U8  d12,d5,d4
73    VABD.U8  d18,d9,d8
74    VMOV.32  d26[0],r6
75    VCGT.U8  d16,d0,d13
76    VMAX.U8  d12,d18,d12
77    VMOVL.U8 q13,d26
78    VABD.U8  d17,d10,d8
79    VCGT.S16 d27,d26,#0
80    VCGT.U8  d12,d2,d12
81    VCGT.U8  d19,d2,d19
82    VAND     d16,d16,d27
83    TST      r6,r9
84    VCGT.U8  d17,d2,d17
85    VAND     d16,d16,d12
86    VAND     d12,d16,d17
87    VAND     d17,d16,d19
88    BLNE     armVCM4P10_DeblockingChromabSLT4_unsafe
89    TST      r6,r8
90    SUB      r0,r0,r1,LSL #3
91    VTST.16  d26,d26,d1
92    BLNE     armVCM4P10_DeblockingChromabSGE4_unsafe
93    VBIT     d29,d13,d26
94    VBIT     d24,d31,d26
95    ADD      r10,r0,#3
96    VBIF     d29,d4,d16
97    ADD      r12,r10,r1
98    ADD      lr,r1,r1
99    VBIF     d24,d8,d16
100    ADDS     r7,r7,r7
101    VST1.8   {d29[0]},[r10],lr
102    VST1.8   {d29[1]},[r12],lr
103    VST1.8   {d29[2]},[r10],lr
104    VST1.8   {d29[3]},[r12],lr
105    VST1.8   {d29[4]},[r10],lr
106    VST1.8   {d29[5]},[r12],lr
107    VST1.8   {d29[6]},[r10],lr
108    VST1.8   {d29[7]},[r12],lr
109    ADD      r12,r0,#4
110    ADD      r10,r12,r1
111    VST1.8   {d24[0]},[r12],lr
112    VST1.8   {d24[1]},[r10],lr
113    VST1.8   {d24[2]},[r12],lr
114    VST1.8   {d24[3]},[r10],lr
115    VST1.8   {d24[4]},[r12],lr
116    VST1.8   {d24[5]},[r10],lr
117    VST1.8   {d24[6]},[r12],lr
118    VST1.8   {d24[7]},[r10],lr
119    ADD      r0,r0,#4
120    BNE      L0x34
121    MOV      r0,#0
122    VPOP     {d8-d15}
123    POP      {r4-r12,pc}
124L0x170:
125    VLD1.8   {d0[]},[r2]
126    ADD      r0,r0,#4
127    SUB      r0,r0,r1,LSL #3
128    ADDS     r7,r7,r7
129    VLD1.8   {d2[]},[r3]
130    ADD      r5,r5,#4
131    BNE      L0x34
132    MOV      r0,#0
133    VPOP     {d8-d15}
134    POP      {r4-r12,pc}
135    .endfunc
136
137    .end
138
139