armVCM4P10_InterpolateLuma_Align_unsafe_s.s revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1;//
2;// Copyright (C) 2007-2008 ARM Limited
3;//
4;// Licensed under the Apache License, Version 2.0 (the "License");
5;// you may not use this file except in compliance with the License.
6;// You may obtain a copy of the License at
7;//
8;//      http://www.apache.org/licenses/LICENSE-2.0
9;//
10;// Unless required by applicable law or agreed to in writing, software
11;// distributed under the License is distributed on an "AS IS" BASIS,
12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13;// See the License for the specific language governing permissions and
14;// limitations under the License.
15;//
16;//
17;//
18;// File Name:  armVCM4P10_InterpolateLuma_Align_unsafe_s.s
19;// OpenMAX DL: v1.0.2
20;// Revision:   9641
21;// Date:       Thursday, February 7, 2008
22;//
23;//
24;//
25;//
26
27        INCLUDE omxtypes_s.h
28        INCLUDE armCOMM_s.h
29
30        M_VARIANTS ARM1136JS
31
32        EXPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
33        EXPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
34
35DEBUG_ON    SETL {FALSE}
36
37    IF ARM1136JS
38
39;// Declare input registers
40pSrc            RN 0
41srcStep         RN 1
42pDst            RN 8
43iHeight         RN 9
44
45;// Declare inner loop registers
46x               RN 7
47x0              RN 7
48x1              RN 10
49x2              RN 11
50Scratch         RN 12
51
52;// Function:
53;//     armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
54;//
55;// Implements copy from an arbitrary aligned source memory location (pSrc) to a 4 byte aligned
56;// destination pointed by (pDst) for horizontal interpolation.
57;// This function needs to copy 9 bytes in horizontal direction.
58;//
59;// Registers used as input for this function
60;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no rows to copy
61;//
62;// Registers preserved for top level function
63;// r2,r3,r4,r5,r6
64;//
65;// Registers modified by the function
66;// r7,r8,r9,r10,r11,r12
67;//
68;// Output registers
69;// r0 - pointer to the new aligned location which will be used as pSrc
70;// r1 - step size to this aligned location
71
72        ;// Function header
73        M_START armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
74
75        ;// Copy pDst to scratch
76        MOV     Scratch, pDst
77
78StartAlignedStackCopy
79        AND     x, pSrc, #3
80        BIC     pSrc, pSrc, #3
81
82        M_SWITCH x
83        M_CASE   Copy0toAligned
84        M_CASE   Copy1toAligned
85        M_CASE   Copy2toAligned
86        M_CASE   Copy3toAligned
87        M_ENDSWITCH
88
89Copy0toAligned
90        LDM     pSrc, {x0, x1, x2}
91        SUBS    iHeight, iHeight, #1
92        ADD     pSrc, pSrc, srcStep
93
94        ;// One cycle stall
95
96        STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
97        BGT     Copy0toAligned
98        B       CopyEnd
99
100Copy1toAligned
101        LDM     pSrc, {x0, x1, x2}
102        SUBS    iHeight, iHeight, #1
103        ADD     pSrc, pSrc, srcStep
104
105        ;// One cycle stall
106
107        MOV     x0, x0, LSR #8
108        ORR     x0, x0, x1, LSL #24
109        MOV     x1, x1, LSR #8
110        ORR     x1, x1, x2, LSL #24
111        MOV     x2, x2, LSR #8
112        STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
113        BGT     Copy1toAligned
114        B       CopyEnd
115
116Copy2toAligned
117        LDM     pSrc, {x0, x1, x2}
118        SUBS    iHeight, iHeight, #1
119        ADD     pSrc, pSrc, srcStep
120
121        ;// One cycle stall
122
123        MOV     x0, x0, LSR #16
124        ORR     x0, x0, x1, LSL #16
125        MOV     x1, x1, LSR #16
126        ORR     x1, x1, x2, LSL #16
127        MOV     x2, x2, LSR #16
128        STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
129        BGT     Copy2toAligned
130        B       CopyEnd
131
132Copy3toAligned
133        LDM     pSrc, {x0, x1, x2}
134        SUBS    iHeight, iHeight, #1
135        ADD     pSrc, pSrc, srcStep
136
137        ;// One cycle stall
138
139        MOV     x0, x0, LSR #24
140        ORR     x0, x0, x1, LSL #8
141        MOV     x1, x1, LSR #24
142        ORR     x1, x1, x2, LSL #8
143        MOV     x2, x2, LSR #24
144        STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
145        BGT     Copy3toAligned
146
147CopyEnd
148
149        MOV     pSrc, Scratch
150        MOV     srcStep, #12
151
152        M_END
153
154
155;// Function:
156;//     armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
157;//
158;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned
159;// destination pointed by (pDst) for vertical interpolation.
160;// This function needs to copy 4 bytes in horizontal direction
161;//
162;// Registers used as input for this function
163;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no of rows to copy
164;//
165;// Registers preserved for top level function
166;// r2,r3,r4,r5,r6
167;//
168;// Registers modified by the function
169;// r7,r8,r9,r10,r11,r12
170;//
171;// Output registers
172;// r0 - pointer to the new aligned location which will be used as pSrc
173;// r1 - step size to this aligned location
174
175        ;// Function header
176        M_START armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
177
178        ;// Copy pSrc to stack
179StartVAlignedStackCopy
180        AND     x, pSrc, #3
181        BIC     pSrc, pSrc, #3
182
183
184        M_SWITCH x
185        M_CASE   Copy0toVAligned
186        M_CASE   Copy1toVAligned
187        M_CASE   Copy2toVAligned
188        M_CASE   Copy3toVAligned
189        M_ENDSWITCH
190
191Copy0toVAligned
192        M_LDR   x0, [pSrc], srcStep
193        SUBS    iHeight, iHeight, #1
194
195        ;// One cycle stall
196
197        STR     x0, [pDst], #4                              ;// Store aligned output row
198        BGT     Copy0toVAligned
199        B       CopyVEnd
200
201Copy1toVAligned
202        LDR     x1, [pSrc, #4]
203        M_LDR   x0, [pSrc], srcStep
204        SUBS    iHeight, iHeight, #1
205
206        ;// One cycle stall
207
208        MOV     x1, x1, LSL #24
209        ORR     x0, x1, x0, LSR #8
210        STR     x0, [pDst], #4                              ;// Store aligned output row
211        BGT     Copy1toVAligned
212        B       CopyVEnd
213
214Copy2toVAligned
215        LDR     x1, [pSrc, #4]
216        M_LDR   x0, [pSrc], srcStep
217        SUBS    iHeight, iHeight, #1
218
219        ;// One cycle stall
220
221        MOV     x1, x1, LSL #16
222        ORR     x0, x1, x0, LSR #16
223        STR     x0, [pDst], #4                              ;// Store aligned output row
224        BGT     Copy2toVAligned
225        B       CopyVEnd
226
227Copy3toVAligned
228        LDR     x1, [pSrc, #4]
229        M_LDR   x0, [pSrc], srcStep
230        SUBS    iHeight, iHeight, #1
231
232        ;// One cycle stall
233
234        MOV     x1, x1, LSL #8
235        ORR     x0, x1, x0, LSR #24
236        STR     x0, [pDst], #4                              ;// Store aligned output row
237        BGT     Copy3toVAligned
238
239CopyVEnd
240
241        SUB     pSrc, pDst, #28
242        MOV     srcStep, #4
243
244        M_END
245
246
247    ENDIF
248
249    END
250
251