armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.S revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1/*
2 * Copyright (C) 2007-2008 ARM Limited
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17/*
18 *
19 */
20
21    .eabi_attribute 24, 1
22    .eabi_attribute 25, 1
23
24    .arm
25    .fpu neon
26    .text
27
28    .global armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
29    .func   armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
30armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe:
31    PUSH     {r4-r12,lr}
32    VLD1.8   {d7},[r0],r1
33    ADD      r12,r0,r1,LSL #2
34    VLD1.8   {d8},[r0],r1
35    VLD1.8   {d12},[r12],r1
36    VLD1.8   {d9},[r0],r1
37    VADDL.U8 q0,d7,d12
38    VLD1.8   {d10},[r0],r1
39    VLD1.8   {d13},[r12],r1
40    VLD1.8   {d11},[r0],r1
41    VLD1.8   {d14},[r12],r1
42    VADDL.U8 q8,d8,d11
43    VADDL.U8 q9,d9,d10
44    VLD1.8   {d15},[r12],r1
45    VMLS.I16 d0,d16,d30
46    VMUL.I16 d20,d18,d31
47    VADDL.U8 q8,d9,d12
48    VADDL.U8 q9,d10,d11
49    VADDL.U8 q1,d8,d13
50    VMLS.I16 d2,d16,d30
51    VMUL.I16 d21,d18,d31
52    VADDL.U8 q8,d10,d13
53    VADDL.U8 q9,d11,d12
54    VADDL.U8 q2,d9,d14
55    VMLS.I16 d4,d16,d30
56    VMUL.I16 d22,d18,d31
57    VADDL.U8 q8,d11,d14
58    VADDL.U8 q3,d10,d15
59    VADDL.U8 q9,d12,d13
60    VMLS.I16 d6,d16,d30
61    VADD.I16 d0,d0,d20
62    VADD.I16 d2,d2,d21
63    VADD.I16 d4,d4,d22
64    VMLA.I16 d6,d18,d31
65    VQRSHRUN.S16 d0,q0,#5
66    VQRSHRUN.S16 d2,q1,#5
67    VQRSHRUN.S16 d4,q2,#5
68    VQRSHRUN.S16 d6,q3,#5
69    POP      {r4-r12,pc}
70    .endfunc
71
72    .end
73
74