omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.S revision 7ea582e1dbdd9a88b2105fbe29ed0ec92cbf70c6
1/*
2 * Copyright (C) 2007-2008 ARM Limited
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17/*
18 *
19 */
20
21    .eabi_attribute 24, 1
22    .eabi_attribute 25, 1
23
24    .arm
25    .fpu neon
26    .text
27
28    .global omxVCM4P10_FilterDeblockingLuma_HorEdge_I
29omxVCM4P10_FilterDeblockingLuma_HorEdge_I:
30    PUSH     {r4-r12,lr}
31    VPUSH    {d8-d15}
32    ADD      r7,r2,#1
33    ADD      r8,r3,#1
34    VLD1.8   {d0[]},[r2]
35    SUB      r0,r0,r1,LSL #2
36    VLD1.8   {d2[]},[r3]
37    LDR      r4,[sp,#0x6c]
38    LDR      r5,[sp,#0x68]
39    MOV      r11,#0
40    VMOV.I8  d14,#0
41    VMOV.I8  d15,#0x1
42    ADD      r10,r1,r1
43    MOV      r9,#0x55000000
44L0x38:
45    LDRH     r12,[r4],#2
46    ADD      r6,r0,r1
47    CMP      r12,#0
48    BEQ      L0xe4
49    VLD1.8   {d7},[r0],r10
50    VLD1.8   {d6},[r6],r10
51    VLD1.8   {d5},[r0],r10
52    VLD1.8   {d4},[r6],r10
53    VLD1.8   {d8},[r0],r10
54    VABD.U8  d12,d4,d5
55    VLD1.8   {d9},[r6]
56    VABD.U8  d13,d8,d4
57    VLD1.8   {d10},[r0],r1
58    VABD.U8  d18,d9,d8
59    VABD.U8  d19,d6,d4
60    VCGT.U8  d16,d0,d13
61    TST      r12,#0xff
62    VMAX.U8  d12,d18,d12
63    VABD.U8  d17,d10,d8
64    VMOVEQ.32 d16[0],r11
65    TST      r12,#0xff00
66    VCGT.U8  d19,d2,d19
67    VCGT.U8  d12,d2,d12
68    VMOVEQ.32 d16[1],r11
69    VCGT.U8  d17,d2,d17
70    VLD1.8   {d11},[r0]
71    VAND     d16,d16,d12
72    TST      r12,#4
73    VAND     d12,d16,d17
74    VAND     d17,d16,d19
75    BNE      L0xf8
76    SUB      r0,r0,r1,LSL #2
77    SUB      r0,r0,r1
78    BL       armVCM4P10_DeblockingLumabSLT4_unsafe
79    VST1.8   {d30},[r0],r1
80    VST1.8   {d29},[r0],r1
81    SUB      r6,r0,r1,LSL #2
82    VST1.8   {d24},[r0],r1
83    ADDS     r9,r9,r9
84    VST1.8   {d25},[r0]
85    ADD      r0,r6,#8
86    BCC      L0x38
87    B        L0x130
88L0xe4:
89    ADD      r0,r0,#8
90    ADDS     r9,r9,r9
91    ADD      r5,r5,#2
92    BCC      L0x38
93    B        L0x130
94L0xf8:
95    SUB      r0,r0,r1,LSL #2
96    SUB      r0,r0,r1,LSL #1
97    BL       armVCM4P10_DeblockingLumabSGE4_unsafe
98    VST1.8   {d31},[r0],r1
99    VST1.8   {d30},[r0],r1
100    VST1.8   {d29},[r0],r1
101    SUB      r6,r0,r1,LSL #2
102    VST1.8   {d24},[r0],r1
103    ADDS     r9,r9,r9
104    VST1.8   {d25},[r0],r1
105    ADD      r5,r5,#2
106    VST1.8   {d28},[r0]
107    ADD      r0,r6,#8
108    BCC      L0x38
109L0x130:
110    SUB      r0,r0,#0x10
111    VLD1.8   {d0[]},[r7]
112    ADD      r0,r0,r1,LSL #2
113    VLD1.8   {d2[]},[r8]
114    BNE      L0x38
115    MOV      r0,#0
116    VPOP     {d8-d15}
117    POP      {r4-r12,pc}
118
119    .end
120
121