omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.S revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1/*
2 * Copyright (C) 2007-2008 ARM Limited
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17/*
18 *
19 */
20
21    .eabi_attribute 24, 1
22    .eabi_attribute 25, 1
23
24    .arm
25    .fpu neon
26    .text
27
28    .global omxVCM4P10_FilterDeblockingChroma_HorEdge_I
29    .func   omxVCM4P10_FilterDeblockingChroma_HorEdge_I
30omxVCM4P10_FilterDeblockingChroma_HorEdge_I:
31    PUSH     {r4-r10,lr}
32    VPUSH    {d8-d15}
33    VLD1.8   {d0[]},[r2]!
34    SUB      r0,r0,r1,LSL #1
35    SUB      r0,r0,r1
36    VLD1.8   {d2[]},[r3]!
37    LDR      r4,[sp,#0x64]
38    LDR      r5,[sp,#0x60]
39    LDR      r9, =0x3030303
40    LDR      r8, =0x4040404
41    VMOV.I8  d14,#0
42    VMOV.I8  d15,#0x1
43    VMOV.I16 d1,#0x4
44    MOV      r7,#0x40000000
45L0x38:
46    LDR      r6,[r4],#8
47    VLD1.8   {d6},[r0],r1
48    VLD1.8   {d5},[r0],r1
49    CMP      r6,#0
50    VLD1.8   {d4},[r0],r1
51    VLD1.8   {d8},[r0],r1
52    VABD.U8  d19,d6,d4
53    VLD1.8   {d9},[r0],r1
54    VABD.U8  d13,d4,d8
55    VLD1.8   {d10},[r0],r1
56    BEQ      L0xe4
57    VABD.U8  d12,d5,d4
58    VABD.U8  d18,d9,d8
59    VCGT.U8  d16,d0,d13
60    VMOV.32  d26[0],r6
61    VMAX.U8  d12,d18,d12
62    VMOVL.U8 q13,d26
63    VABD.U8  d17,d10,d8
64    VCGT.S16 d27,d26,#0
65    VCGT.U8  d12,d2,d12
66    VCGT.U8  d19,d2,d19
67    VAND     d16,d16,d27
68    TST      r6,r9
69    VCGT.U8  d17,d2,d17
70    VAND     d16,d16,d12
71    VAND     d12,d16,d17
72    VAND     d17,d16,d19
73    BLNE     armVCM4P10_DeblockingChromabSLT4_unsafe
74    TST      r6,r8
75    SUB      r0,r0,r1,LSL #2
76    VTST.16  d26,d26,d1
77    BLNE     armVCM4P10_DeblockingChromabSGE4_unsafe
78    VBIT     d29,d13,d26
79    VBIT     d24,d31,d26
80    VBIF     d29,d4,d16
81    VBIF     d24,d8,d16
82    VST1.8   {d29},[r0],r1
83    ADDS     r7,r7,r7
84    VST1.8   {d24},[r0],r1
85    BNE      L0x38
86    MOV      r0,#0
87    VPOP     {d8-d15}
88    POP      {r4-r10,pc}
89L0xe4:
90    VLD1.8   {d0[]},[r2]
91    SUB      r0,r0,r1,LSL #1
92    ADDS     r7,r7,r7
93    VLD1.8   {d2[]},[r3]
94    ADD      r5,r5,#4
95    BNE      L0x38
96    MOV      r0,#0
97    VPOP     {d8-d15}
98    POP      {r4-r10,pc}
99    .endfunc
100
101    .end
102
103