armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.S revision 7ea582e1dbdd9a88b2105fbe29ed0ec92cbf70c6
1/*
2 * Copyright (C) 2007-2008 ARM Limited
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17/*
18 *
19 */
20
21    .eabi_attribute 24, 1
22    .eabi_attribute 25, 1
23
24    .arm
25    .fpu neon
26    .text
27
28    .global armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
29armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe:
30    PUSH     {r4-r12,lr}
31    VLD1.8   {d7},[r0],r1
32    ADD      r12,r0,r1,LSL #2
33    VLD1.8   {d8},[r0],r1
34    VLD1.8   {d12},[r12],r1
35    VLD1.8   {d9},[r0],r1
36    VADDL.U8 q0,d7,d12
37    VLD1.8   {d10},[r0],r1
38    VLD1.8   {d13},[r12],r1
39    VLD1.8   {d11},[r0],r1
40    VLD1.8   {d14},[r12],r1
41    VADDL.U8 q8,d8,d11
42    VADDL.U8 q9,d9,d10
43    VLD1.8   {d15},[r12],r1
44    VMLS.I16 d0,d16,d30
45    VMUL.I16 d20,d18,d31
46    VADDL.U8 q8,d9,d12
47    VADDL.U8 q9,d10,d11
48    VADDL.U8 q1,d8,d13
49    VMLS.I16 d2,d16,d30
50    VMUL.I16 d21,d18,d31
51    VADDL.U8 q8,d10,d13
52    VADDL.U8 q9,d11,d12
53    VADDL.U8 q2,d9,d14
54    VMLS.I16 d4,d16,d30
55    VMUL.I16 d22,d18,d31
56    VADDL.U8 q8,d11,d14
57    VADDL.U8 q3,d10,d15
58    VADDL.U8 q9,d12,d13
59    VMLS.I16 d6,d16,d30
60    VADD.I16 d0,d0,d20
61    VADD.I16 d2,d2,d21
62    VADD.I16 d4,d4,d22
63    VMLA.I16 d6,d18,d31
64    VQRSHRUN.S16 d0,q0,#5
65    VQRSHRUN.S16 d2,q1,#5
66    VQRSHRUN.S16 d4,q2,#5
67    VQRSHRUN.S16 d6,q3,#5
68    POP      {r4-r12,pc}
69
70    .end
71
72