omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.S revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1/*
2 * Copyright (C) 2007-2008 ARM Limited
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17/*
18 *
19 */
20
21    .eabi_attribute 24, 1
22    .eabi_attribute 25, 1
23
24    .arm
25    .fpu neon
26    .text
27
28    .global omxVCM4P10_FilterDeblockingLuma_HorEdge_I
29    .func   omxVCM4P10_FilterDeblockingLuma_HorEdge_I
30omxVCM4P10_FilterDeblockingLuma_HorEdge_I:
31    PUSH     {r4-r12,lr}
32    VPUSH    {d8-d15}
33    ADD      r7,r2,#1
34    ADD      r8,r3,#1
35    VLD1.8   {d0[]},[r2]
36    SUB      r0,r0,r1,LSL #2
37    VLD1.8   {d2[]},[r3]
38    LDR      r4,[sp,#0x6c]
39    LDR      r5,[sp,#0x68]
40    MOV      r11,#0
41    VMOV.I8  d14,#0
42    VMOV.I8  d15,#0x1
43    ADD      r10,r1,r1
44    MOV      r9,#0x55000000
45L0x38:
46    LDRH     r12,[r4],#2
47    ADD      r6,r0,r1
48    CMP      r12,#0
49    BEQ      L0xe4
50    VLD1.8   {d7},[r0],r10
51    VLD1.8   {d6},[r6],r10
52    VLD1.8   {d5},[r0],r10
53    VLD1.8   {d4},[r6],r10
54    VLD1.8   {d8},[r0],r10
55    VABD.U8  d12,d4,d5
56    VLD1.8   {d9},[r6]
57    VABD.U8  d13,d8,d4
58    VLD1.8   {d10},[r0],r1
59    VABD.U8  d18,d9,d8
60    VABD.U8  d19,d6,d4
61    VCGT.U8  d16,d0,d13
62    TST      r12,#0xff
63    VMAX.U8  d12,d18,d12
64    VABD.U8  d17,d10,d8
65    VMOVEQ.32 d16[0],r11
66    TST      r12,#0xff00
67    VCGT.U8  d19,d2,d19
68    VCGT.U8  d12,d2,d12
69    VMOVEQ.32 d16[1],r11
70    VCGT.U8  d17,d2,d17
71    VLD1.8   {d11},[r0]
72    VAND     d16,d16,d12
73    TST      r12,#4
74    VAND     d12,d16,d17
75    VAND     d17,d16,d19
76    BNE      L0xf8
77    SUB      r0,r0,r1,LSL #2
78    SUB      r0,r0,r1
79    BL       armVCM4P10_DeblockingLumabSLT4_unsafe
80    VST1.8   {d30},[r0],r1
81    VST1.8   {d29},[r0],r1
82    SUB      r6,r0,r1,LSL #2
83    VST1.8   {d24},[r0],r1
84    ADDS     r9,r9,r9
85    VST1.8   {d25},[r0]
86    ADD      r0,r6,#8
87    BCC      L0x38
88    B        L0x130
89L0xe4:
90    ADD      r0,r0,#8
91    ADDS     r9,r9,r9
92    ADD      r5,r5,#2
93    BCC      L0x38
94    B        L0x130
95L0xf8:
96    SUB      r0,r0,r1,LSL #2
97    SUB      r0,r0,r1,LSL #1
98    BL       armVCM4P10_DeblockingLumabSGE4_unsafe
99    VST1.8   {d31},[r0],r1
100    VST1.8   {d30},[r0],r1
101    VST1.8   {d29},[r0],r1
102    SUB      r6,r0,r1,LSL #2
103    VST1.8   {d24},[r0],r1
104    ADDS     r9,r9,r9
105    VST1.8   {d25},[r0],r1
106    ADD      r5,r5,#2
107    VST1.8   {d28},[r0]
108    ADD      r0,r6,#8
109    BCC      L0x38
110L0x130:
111    SUB      r0,r0,#0x10
112    VLD1.8   {d0[]},[r7]
113    ADD      r0,r0,r1,LSL #2
114    VLD1.8   {d2[]},[r8]
115    BNE      L0x38
116    MOV      r0,#0
117    VPOP     {d8-d15}
118    POP      {r4-r12,pc}
119    .endfunc
120
121    .end
122
123