armVCM4P10_Interpolate_Chroma_s.S revision 7ea582e1dbdd9a88b2105fbe29ed0ec92cbf70c6
1/*
2 * Copyright (C) 2007-2008 ARM Limited
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17/*
18 *
19 */
20
21    .eabi_attribute 24, 1
22    .eabi_attribute 25, 1
23
24    .arm
25    .fpu neon
26
27    .text
28    .align 4
29
30armVCM4P10_WidthBranchTableMVIsNotZero:
31    .word   WidthIs2MVIsNotZero-(P0+8), WidthIs2MVIsNotZero-(P0+8)
32    .word   WidthIs4MVIsNotZero-(P0+8), WidthIs4MVIsNotZero-(P0+8)
33    .word   WidthIs8MVIsNotZero-(P0+8)
34
35armVCM4P10_WidthBranchTableMVIsZero:
36    .word   WidthIs2MVIsZero-(P0+8), WidthIs2MVIsZero-(P0+8)
37    .word   WidthIs4MVIsZero-(P0+8), WidthIs4MVIsZero-(P0+8)
38    .word   WidthIs8MVIsZero-(P0+8)
39
40    .global armVCM4P10_Interpolate_Chroma
41armVCM4P10_Interpolate_Chroma:
42    PUSH     {r4-r12,lr}
43    VPUSH    {d8-d15}
44    LDRD     r6,r7,[sp,#0x70]
45    LDRD     r4,r5,[sp,#0x68]
46    RSB      r8,r6,#8
47    RSB      r9,r7,#8
48    CMN      r6,r7
49    MOV      r10,#1
50    ADREQ    r11, armVCM4P10_WidthBranchTableMVIsZero
51    SUB      lr,r1,r10
52    ADRNE    r11, armVCM4P10_WidthBranchTableMVIsNotZero
53    VLD1.8   {d0},[r0],r10
54    SMULBB   r12,r8,r9
55    SMULBB   r9,r6,r9
56    VLD1.8   {d1},[r0],lr
57    SMULBB   r8,r8,r7
58    SMULBB   r6,r6,r7
59    VDUP.8   d12,r12
60    VDUP.8   d13,r9
61    VDUP.8   d14,r8
62    VDUP.8   d15,r6
63    LDR      r11,[r11, r4, lsl #1]
64P0: ADD      pc,r11
65
66WidthIs8MVIsNotZero:
67    VLD1.8   {d2},[r0],r10
68    VMULL.U8 q2,d0,d12
69    VLD1.8   {d3},[r0],lr
70    VMULL.U8 q3,d2,d12
71    VLD1.8   {d16},[r0],r10
72    VMLAL.U8 q2,d1,d13
73    VLD1.8   {d17},[r0],lr
74    VMULL.U8 q11,d16,d12
75    VMLAL.U8 q3,d3,d13
76    VLD1.8   {d18},[r0],r10
77    VMLAL.U8 q2,d2,d14
78    VMLAL.U8 q11,d17,d13
79    VMULL.U8 q12,d18,d12
80    VLD1.8   {d19},[r0],lr
81    VMLAL.U8 q3,d16,d14
82    VLD1.8   {d0},[r0],r10
83    VMLAL.U8 q12,d19,d13
84    VMLAL.U8 q11,d18,d14
85    VMLAL.U8 q2,d3,d15
86    VLD1.8   {d1},[r0],lr
87    VMLAL.U8 q12,d0,d14
88    VMLAL.U8 q3,d17,d15
89    VMLAL.U8 q11,d19,d15
90    SUBS     r5,r5,#4
91    VMLAL.U8 q12,d1,d15
92    VQRSHRN.U16 d8,q2,#6
93    VQRSHRN.U16 d9,q3,#6
94    VQRSHRN.U16 d20,q11,#6
95    VST1.64  {d8},[r2],r3
96    VQRSHRN.U16 d21,q12,#6
97    VST1.64  {d9},[r2],r3
98    VST1.64  {d20},[r2],r3
99    VST1.64  {d21},[r2],r3
100    BGT      WidthIs8MVIsNotZero
101    MOV      r0,#0
102    VPOP     {d8-d15}
103    POP      {r4-r12,pc}
104
105WidthIs4MVIsNotZero:
106    VLD1.8   {d2},[r0],r10
107    VMULL.U8 q2,d0,d12
108    VMULL.U8 q3,d2,d12
109    VLD1.8   {d3},[r0],lr
110    VMLAL.U8 q2,d1,d13
111    VMLAL.U8 q3,d3,d13
112    VLD1.8   {d0},[r0],r10
113    VMLAL.U8 q2,d2,d14
114    VMLAL.U8 q3,d0,d14
115    VLD1.8   {d1},[r0],lr
116    SUBS     r5,r5,#2
117    VMLAL.U8 q3,d1,d15
118    VMLAL.U8 q2,d3,d15
119    VQRSHRN.U16 d9,q3,#6
120    VQRSHRN.U16 d8,q2,#6
121    VST1.32  {d8[0]},[r2],r3
122    VST1.32  {d9[0]},[r2],r3
123    BGT      WidthIs4MVIsNotZero
124    MOV      r0,#0
125    VPOP     {d8-d15}
126    POP      {r4-r12,pc}
127
128WidthIs2MVIsNotZero:
129    VLD1.8   {d2},[r0],r10
130    VMULL.U8 q2,d0,d12
131    VMULL.U8 q3,d2,d12
132    VLD1.8   {d3},[r0],lr
133    VMLAL.U8 q2,d1,d13
134    VMLAL.U8 q3,d3,d13
135    VLD1.8   {d0},[r0],r10
136    VMLAL.U8 q2,d2,d14
137    VMLAL.U8 q3,d0,d14
138    VLD1.8   {d1},[r0],lr
139    SUBS     r5,r5,#2
140    VMLAL.U8 q3,d1,d15
141    VMLAL.U8 q2,d3,d15
142    VQRSHRN.U16 d9,q3,#6
143    VQRSHRN.U16 d8,q2,#6
144    VST1.16  {d8[0]},[r2],r3
145    VST1.16  {d9[0]},[r2],r3
146    BGT      WidthIs2MVIsNotZero
147    MOV      r0,#0
148    VPOP     {d8-d15}
149    POP      {r4-r12,pc}
150
151WidthIs8MVIsZero:
152    SUB      r0,r0,r1
153WidthIs8LoopMVIsZero:
154    VLD1.8   {d0},[r0],r1
155    SUBS     r5,r5,#2
156    VLD1.8   {d1},[r0],r1
157    VST1.64  {d0},[r2],r3
158    VST1.64  {d1},[r2],r3
159    BGT      WidthIs8LoopMVIsZero
160    MOV      r0,#0
161    VPOP     {d8-d15}
162    POP      {r4-r12,pc}
163
164WidthIs4MVIsZero:
165    VLD1.8   {d1},[r0],r1
166    SUBS     r5,r5,#2
167    VST1.32  {d0[0]},[r2],r3
168    VLD1.8   {d0},[r0],r1
169    VST1.32  {d1[0]},[r2],r3
170    BGT      WidthIs4MVIsZero
171    MOV      r0,#0
172    VPOP     {d8-d15}
173    POP      {r4-r12,pc}
174
175WidthIs2MVIsZero:
176    VLD1.8   {d1},[r0],r1
177    SUBS     r5,r5,#2
178    VST1.16  {d0[0]},[r2],r3
179    VLD1.8   {d0},[r0],r1
180    VST1.16  {d1[0]},[r2],r3
181    BGT      WidthIs2MVIsZero
182    MOV      r0,#0
183    VPOP     {d8-d15}
184    POP      {r4-r12,pc}
185
186    .end
187
188