armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s revision 0c1bc742181ded4930842b46e9507372f0b1b963
1;//
2;//
3;// File Name:  armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
4;// OpenMAX DL: v1.0.2
5;// Revision:   12290
6;// Date:       Wednesday, April 9, 2008
7;//
8;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
9;//
10;//
11;//
12
13        INCLUDE omxtypes_s.h
14        INCLUDE armCOMM_s.h
15
16        EXPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
17
18        M_VARIANTS CortexA8
19
20    IF CortexA8
21        M_START armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe, r11
22
23;// Declare input registers
24pSrc            RN 0
25srcStep         RN 1
26pDst            RN 2
27dstStep         RN 3
28
29;// Declare Neon registers
30dTCoeff5        DN 30.U8
31dTCoeff20       DN 31.U8
32dCoeff5         DN 30.S16
33dCoeff20        DN 31.S16
34
35qSrcA01         QN 0.U8
36qSrcB23         QN 1.U8
37qSrcC45         QN 2.U8
38qSrcD67         QN 3.U8
39qSrcE89         QN 4.U8
40qSrcF1011       QN 5.U8
41qSrcG1213       QN 6.U8
42qSrcH1415       QN 7.U8
43qSrcI1617       QN 8.U8
44
45dSrcA0          DN 0.U8
46dSrcB2          DN 2.U8
47dSrcC4          DN 4.U8
48dSrcD6          DN 6.U8
49dSrcE8          DN 8.U8
50dSrcF10         DN 10.U8
51dSrcG12         DN 12.U8
52dSrcH14         DN 14.U8
53dSrcI16         DN 16.U8
54
55dSrcA1          DN 1.U8
56dSrcB3          DN 3.U8
57dSrcC5          DN 5.U8
58dSrcD7          DN 7.U8
59dSrcE9          DN 9.U8
60dSrcF11         DN 11.U8
61dSrcG13         DN 13.U8
62dSrcH15         DN 15.U8
63dSrcI17         DN 17.U8
64
65qTempP01        QN 9.S16
66qTempQ01        QN 10.S16
67qTempR01        QN 11.S16
68qTempS01        QN 12.S16
69
70qTempP23        QN 0.S16
71qTempQ23        QN 1.S16
72qTempR23        QN 2.S16
73qTempS23        QN 3.S16
74
75dTempP0         DN 18.S16
76dTempP1         DN 19.S16
77dTempP2         DN 0.S16
78
79dTempQ0         DN 20.S16
80dTempQ1         DN 21.S16
81dTempQ2         DN 2.S16
82
83dTempR0         DN 22.S16
84dTempR1         DN 23.S16
85dTempR2         DN 4.S16
86
87dTempS0         DN 24.S16
88dTempS1         DN 25.S16
89dTempS2         DN 6.S16
90
91dTempB0         DN 26.S16
92dTempC0         DN 27.S16
93dTempD0         DN 28.S16
94dTempF0         DN 29.S16
95
96dTempAcc0       DN 0.U16
97dTempAcc1       DN 2.U16
98dTempAcc2       DN 4.U16
99dTempAcc3       DN 6.U16
100
101dAcc0           DN 0.U8
102dAcc1           DN 2.U8
103dAcc2           DN 4.U8
104dAcc3           DN 6.U8
105
106qAcc0           QN 0.S32
107qAcc1           QN 1.S32
108qAcc2           QN 2.S32
109qAcc3           QN 3.S32
110
111qTAcc0          QN 0.U16
112qTAcc1          QN 1.U16
113qTAcc2          QN 2.U16
114qTAcc3          QN 3.U16
115
116qTmp            QN 4.S16
117dTmp            DN 8.S16
118
119        VLD1        qSrcA01, [pSrc], srcStep                 ;// [a0 a1 a2 a3 .. a15]
120        ADD         r12, pSrc, srcStep, LSL #2
121        VMOV        dTCoeff5, #5
122        VMOV        dTCoeff20, #20
123        VLD1        qSrcF1011, [r12], srcStep
124        VLD1        qSrcB23, [pSrc], srcStep                 ;// [b0 b1 b2 b3 .. b15]
125
126        VLD1        qSrcG1213, [r12], srcStep
127        VADDL       qTempP01, dSrcA0, dSrcF10
128        VLD1        qSrcC45, [pSrc], srcStep                 ;// [c0 c1 c2 c3 .. c15]
129        VADDL       qTempP23, dSrcA1, dSrcF11
130        VLD1        qSrcD67, [pSrc], srcStep
131        VADDL       qTempQ01, dSrcB2, dSrcG12
132        VLD1        qSrcE89, [pSrc], srcStep
133
134        ;//t0
135        VMLAL       qTempP01, dSrcC4, dTCoeff20
136
137        VLD1        qSrcH1415, [r12], srcStep
138
139        VMLAL       qTempP23, dSrcC5, dTCoeff20
140
141        VLD1        qSrcI1617, [r12], srcStep                 ;// [i0 i1 i2 i3 .. ]
142
143        VMLAL       qTempP01, dSrcD6, dTCoeff20
144        VMLAL       qTempQ01, dSrcD6, dTCoeff20
145        VMLSL       qTempP23, dSrcB3, dTCoeff5
146
147        VADDL       qTempR01, dSrcC4, dSrcH14
148
149        VMLSL       qTempP01, dSrcB2, dTCoeff5
150
151        VADDL       qTempQ23, dSrcB3, dSrcG13
152
153        VMLAL       qTempP23, dSrcD7, dTCoeff20
154        VMLAL       qTempQ01, dSrcE8, dTCoeff20
155
156        VMLSL       qTempP01, dSrcE8, dTCoeff5
157        VMLAL       qTempQ23, dSrcD7, dTCoeff20
158
159        VMLSL       qTempP23, dSrcE9, dTCoeff5
160
161        ;//t1
162
163        VMLAL       qTempR01, dSrcE8, dTCoeff20
164        VMLSL       qTempQ01, dSrcC4, dTCoeff5
165        VMLSL       qTempQ23, dSrcC5, dTCoeff5
166        VADDL       qTempR23, dSrcC5, dSrcH15
167
168        VMLAL       qTempR01, dSrcF10, dTCoeff20
169        VMLSL       qTempQ01, dSrcF10, dTCoeff5
170        VMLAL       qTempQ23, dSrcE9, dTCoeff20
171        VMLAL       qTempR23, dSrcE9, dTCoeff20
172        VADDL       qTempS01, dSrcD6, dSrcI16
173
174
175        VMLSL       qTempR01, dSrcD6, dTCoeff5
176        VMLSL       qTempQ23, dSrcF11, dTCoeff5
177        VMLSL       qTempR23, dSrcD7, dTCoeff5
178
179        ;//t2
180        VADDL       qTempS23, dSrcD7, dSrcI17
181        VMLAL       qTempS01, dSrcF10, dTCoeff20
182        VMLSL       qTempR01, dSrcG12, dTCoeff5
183        VMLSL       qTempR23, dSrcG13, dTCoeff5
184
185        VMLAL       qTempS23, dSrcF11, dTCoeff20
186        VMLAL       qTempS01, dSrcG12, dTCoeff20
187        VEXT        dTempB0, dTempP0, dTempP1, #1
188        VMLAL       qTempR23, dSrcF11, dTCoeff20
189
190
191        ;//t3
192        VMLAL       qTempS23, dSrcG13, dTCoeff20
193        VMLSL       qTempS01, dSrcE8, dTCoeff5
194        VEXT        dTempC0, dTempP0, dTempP1, #2
195        VMOV        dCoeff20, #20
196        VMLSL       qTempS23, dSrcE9, dTCoeff5
197        VMLSL       qTempS01, dSrcH14, dTCoeff5
198        VEXT        dTempF0, dTempP1, dTempP2, #1
199        VEXT        dTempD0, dTempP0, dTempP1, #3
200        VMLSL       qTempS23, dSrcH15, dTCoeff5
201
202        VADDL       qAcc0, dTempP0, dTempF0
203        VADD        dTempC0, dTempC0, dTempD0
204        ;//h
205        VMOV        dCoeff5, #5
206
207        ;// res0
208        VADD        dTempB0, dTempB0, dTempP1
209        VMLAL       qAcc0, dTempC0, dCoeff20
210        VEXT        dTempC0, dTempQ0, dTempQ1, #2
211        VEXT        dTempD0, dTempQ0, dTempQ1, #3
212        VEXT        dTempF0, dTempQ1, dTempQ2, #1
213        VMLSL       qAcc0, dTempB0, dCoeff5
214
215        ;// res1
216        VEXT        dTempB0, dTempQ0, dTempQ1, #1
217        VADDL       qAcc1, dTempQ0, dTempF0
218        VADD        dTempC0, dTempC0, dTempD0
219        VADD        dTempB0, dTempB0, dTempQ1
220        VEXT        dTempD0, dTempR0, dTempR1, #3
221        VMLAL       qAcc1, dTempC0, dCoeff20
222        VEXT        dTempF0, dTempR1, dTempR2, #1
223        VEXT        dTempC0, dTempR0, dTempR1, #2
224        VEXT        dTmp, dTempR0, dTempR1, #1
225        VADDL       qAcc2, dTempR0, dTempF0
226        VMLSL       qAcc1, dTempB0, dCoeff5
227;        VEXT        dTempB0, dTempR0, dTempR1, #1
228        VADD        dTempC0, dTempC0, dTempD0
229
230        ;// res2
231        VADD        dTempB0, dTmp, dTempR1
232        VEXT        dTempD0, dTempS0, dTempS1, #3
233        VMLAL       qAcc2, dTempC0, dCoeff20
234;        VADD        dTempB0, dTempB0, dTempR1
235
236        ;// res3
237        VEXT        dTempC0, dTempS0, dTempS1, #2
238        VEXT        dTempF0, dTempS1, dTempS2, #1
239        VADD        dTempC0, dTempC0, dTempD0
240        VEXT        dTmp, dTempS0, dTempS1, #1
241        VADDL       qAcc3, dTempS0, dTempF0
242        VMLSL       qAcc2, dTempB0, dCoeff5
243        VMLAL       qAcc3, dTempC0, dCoeff20
244        VADD        dTmp, dTmp, dTempS1
245        VMLSL       qAcc3, dTmp, dCoeff5
246
247        VQRSHRUN    dTempAcc0, qAcc0, #10
248        VQRSHRUN    dTempAcc1, qAcc1, #10
249        VQRSHRUN    dTempAcc2, qAcc2, #10
250        VQRSHRUN    dTempAcc3, qAcc3, #10
251
252        VQMOVN      dAcc0, qTAcc0
253        VQMOVN      dAcc1, qTAcc1
254        VQMOVN      dAcc2, qTAcc2
255        VQMOVN      dAcc3, qTAcc3
256
257        M_END
258
259    ENDIF
260
261
262
263
264
265    END
266
267