armVCM4P10_Interpolate_Chroma_s.S revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1/*
2 * Copyright (C) 2007-2008 ARM Limited
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17/*
18 *
19 */
20
21    .eabi_attribute 24, 1
22    .eabi_attribute 25, 1
23
24    .arm
25    .fpu neon
26
27    .text
28    .align 4
29
30armVCM4P10_WidthBranchTableMVIsNotZero:
31    .word   WidthIs2MVIsNotZero-(P0+8), WidthIs2MVIsNotZero-(P0+8)
32    .word   WidthIs4MVIsNotZero-(P0+8), WidthIs4MVIsNotZero-(P0+8)
33    .word   WidthIs8MVIsNotZero-(P0+8)
34
35armVCM4P10_WidthBranchTableMVIsZero:
36    .word   WidthIs2MVIsZero-(P0+8), WidthIs2MVIsZero-(P0+8)
37    .word   WidthIs4MVIsZero-(P0+8), WidthIs4MVIsZero-(P0+8)
38    .word   WidthIs8MVIsZero-(P0+8)
39
40    .global armVCM4P10_Interpolate_Chroma
41    .func   armVCM4P10_Interpolate_Chroma
42armVCM4P10_Interpolate_Chroma:
43    PUSH     {r4-r12,lr}
44    VPUSH    {d8-d15}
45    LDRD     r6,r7,[sp,#0x70]
46    LDRD     r4,r5,[sp,#0x68]
47    RSB      r8,r6,#8
48    RSB      r9,r7,#8
49    CMN      r6,r7
50    MOV      r10,#1
51    ADREQ    r11, armVCM4P10_WidthBranchTableMVIsZero
52    SUB      lr,r1,r10
53    ADRNE    r11, armVCM4P10_WidthBranchTableMVIsNotZero
54    VLD1.8   {d0},[r0],r10
55    SMULBB   r12,r8,r9
56    SMULBB   r9,r6,r9
57    VLD1.8   {d1},[r0],lr
58    SMULBB   r8,r8,r7
59    SMULBB   r6,r6,r7
60    VDUP.8   d12,r12
61    VDUP.8   d13,r9
62    VDUP.8   d14,r8
63    VDUP.8   d15,r6
64    LDR      r11,[r11, r4, lsl #1]
65P0: ADD      pc,r11
66
67WidthIs8MVIsNotZero:
68    VLD1.8   {d2},[r0],r10
69    VMULL.U8 q2,d0,d12
70    VLD1.8   {d3},[r0],lr
71    VMULL.U8 q3,d2,d12
72    VLD1.8   {d16},[r0],r10
73    VMLAL.U8 q2,d1,d13
74    VLD1.8   {d17},[r0],lr
75    VMULL.U8 q11,d16,d12
76    VMLAL.U8 q3,d3,d13
77    VLD1.8   {d18},[r0],r10
78    VMLAL.U8 q2,d2,d14
79    VMLAL.U8 q11,d17,d13
80    VMULL.U8 q12,d18,d12
81    VLD1.8   {d19},[r0],lr
82    VMLAL.U8 q3,d16,d14
83    VLD1.8   {d0},[r0],r10
84    VMLAL.U8 q12,d19,d13
85    VMLAL.U8 q11,d18,d14
86    VMLAL.U8 q2,d3,d15
87    VLD1.8   {d1},[r0],lr
88    VMLAL.U8 q12,d0,d14
89    VMLAL.U8 q3,d17,d15
90    VMLAL.U8 q11,d19,d15
91    SUBS     r5,r5,#4
92    VMLAL.U8 q12,d1,d15
93    VQRSHRN.U16 d8,q2,#6
94    VQRSHRN.U16 d9,q3,#6
95    VQRSHRN.U16 d20,q11,#6
96    VST1.64  {d8},[r2],r3
97    VQRSHRN.U16 d21,q12,#6
98    VST1.64  {d9},[r2],r3
99    VST1.64  {d20},[r2],r3
100    VST1.64  {d21},[r2],r3
101    BGT      WidthIs8MVIsNotZero
102    MOV      r0,#0
103    VPOP     {d8-d15}
104    POP      {r4-r12,pc}
105
106WidthIs4MVIsNotZero:
107    VLD1.8   {d2},[r0],r10
108    VMULL.U8 q2,d0,d12
109    VMULL.U8 q3,d2,d12
110    VLD1.8   {d3},[r0],lr
111    VMLAL.U8 q2,d1,d13
112    VMLAL.U8 q3,d3,d13
113    VLD1.8   {d0},[r0],r10
114    VMLAL.U8 q2,d2,d14
115    VMLAL.U8 q3,d0,d14
116    VLD1.8   {d1},[r0],lr
117    SUBS     r5,r5,#2
118    VMLAL.U8 q3,d1,d15
119    VMLAL.U8 q2,d3,d15
120    VQRSHRN.U16 d9,q3,#6
121    VQRSHRN.U16 d8,q2,#6
122    VST1.32  {d8[0]},[r2],r3
123    VST1.32  {d9[0]},[r2],r3
124    BGT      WidthIs4MVIsNotZero
125    MOV      r0,#0
126    VPOP     {d8-d15}
127    POP      {r4-r12,pc}
128
129WidthIs2MVIsNotZero:
130    VLD1.8   {d2},[r0],r10
131    VMULL.U8 q2,d0,d12
132    VMULL.U8 q3,d2,d12
133    VLD1.8   {d3},[r0],lr
134    VMLAL.U8 q2,d1,d13
135    VMLAL.U8 q3,d3,d13
136    VLD1.8   {d0},[r0],r10
137    VMLAL.U8 q2,d2,d14
138    VMLAL.U8 q3,d0,d14
139    VLD1.8   {d1},[r0],lr
140    SUBS     r5,r5,#2
141    VMLAL.U8 q3,d1,d15
142    VMLAL.U8 q2,d3,d15
143    VQRSHRN.U16 d9,q3,#6
144    VQRSHRN.U16 d8,q2,#6
145    VST1.16  {d8[0]},[r2],r3
146    VST1.16  {d9[0]},[r2],r3
147    BGT      WidthIs2MVIsNotZero
148    MOV      r0,#0
149    VPOP     {d8-d15}
150    POP      {r4-r12,pc}
151
152WidthIs8MVIsZero:
153    SUB      r0,r0,r1
154WidthIs8LoopMVIsZero:
155    VLD1.8   {d0},[r0],r1
156    SUBS     r5,r5,#2
157    VLD1.8   {d1},[r0],r1
158    VST1.64  {d0},[r2],r3
159    VST1.64  {d1},[r2],r3
160    BGT      WidthIs8LoopMVIsZero
161    MOV      r0,#0
162    VPOP     {d8-d15}
163    POP      {r4-r12,pc}
164
165WidthIs4MVIsZero:
166    VLD1.8   {d1},[r0],r1
167    SUBS     r5,r5,#2
168    VST1.32  {d0[0]},[r2],r3
169    VLD1.8   {d0},[r0],r1
170    VST1.32  {d1[0]},[r2],r3
171    BGT      WidthIs4MVIsZero
172    MOV      r0,#0
173    VPOP     {d8-d15}
174    POP      {r4-r12,pc}
175
176WidthIs2MVIsZero:
177    VLD1.8   {d1},[r0],r1
178    SUBS     r5,r5,#2
179    VST1.16  {d0[0]},[r2],r3
180    VLD1.8   {d0},[r0],r1
181    VST1.16  {d1[0]},[r2],r3
182    BGT      WidthIs2MVIsZero
183    MOV      r0,#0
184    VPOP     {d8-d15}
185    POP      {r4-r12,pc}
186    .endfunc
187
188    .end
189
190