omxVCM4P10_InterpolateLuma_s.s revision 78e52bfac041d71ce53b5b13c2abf78af742b09d
1;//
2;// Copyright (C) 2007-2008 ARM Limited
3;//
4;// Licensed under the Apache License, Version 2.0 (the "License");
5;// you may not use this file except in compliance with the License.
6;// You may obtain a copy of the License at
7;//
8;//      http://www.apache.org/licenses/LICENSE-2.0
9;//
10;// Unless required by applicable law or agreed to in writing, software
11;// distributed under the License is distributed on an "AS IS" BASIS,
12;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13;// See the License for the specific language governing permissions and
14;// limitations under the License.
15;//
16;//
17;//
18;// File Name:  omxVCM4P10_InterpolateLuma_s.s
19;// OpenMAX DL: v1.0.2
20;// Revision:   9641
21;// Date:       Thursday, February 7, 2008
22;//
23;//
24;//
25;//
26
27;// Function:
28;//     omxVCM4P10_InterpolateLuma
29;//
30;// This function implements omxVCM4P10_InterpolateLuma in v6 assembly.
31;// Performs quarter pel interpolation of inter luma MB.
32;// It's assumed that the frame is already padded when calling this function.
33;// Parameters:
34;// [in]    pSrc        Pointer to the source reference frame buffer
35;// [in]    srcStep     Reference frame step in byte
36;// [in]    dstStep     Destination frame step in byte. Must be multiple of roi.width
37;// [in]    dx          Fractional part of horizontal motion vector
38;//                         component in 1/4 pixel unit; valid in the range [0,3]
39;// [in]    dy          Fractional part of vertical motion vector
40;//                         component in 1/4 pixel unit; valid in the range [0,3]
41;// [in]    roi         Dimension of the interpolation region;the parameters roi.width and roi.height must
42;//                         be equal to either 4, 8, or 16.
43;// [out]   pDst        Pointer to the destination frame buffer.
44;//                   if roi.width==4,  4-byte alignment required
45;//                   if roi.width==8,  8-byte alignment required
46;//                   if roi.width==16, 16-byte alignment required
47;//
48;// Return Value:
49;// If the function runs without error, it returns OMX_Sts_NoErr.
50;// It is assued that following cases are satisfied before calling this function:
51;//  pSrc or pDst is not NULL.
52;//  srcStep or dstStep >= roi.width.
53;//     dx or dy is in the range [0-3].
54;//     roi.width or roi.height is not out of range {4, 8, 16}.
55;//     If roi.width is equal to 4, Dst is 4 byte aligned.
56;//     If roi.width is equal to 8, pDst is 8 byte aligned.
57;//     If roi.width is equal to 16, pDst is 16 byte aligned.
58;//     srcStep and dstStep is multiple of 8.
59;//
60;//
61
62
63        INCLUDE omxtypes_s.h
64        INCLUDE armCOMM_s.h
65
66        M_VARIANTS ARM1136JS
67
68        EXPORT omxVCM4P10_InterpolateLuma
69
70    IF ARM1136JS
71        IMPORT armVCM4P10_InterpolateLuma_Copy4x4_unsafe
72        IMPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
73        IMPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
74        IMPORT armVCM4P10_Average_4x4_Align0_unsafe
75        IMPORT armVCM4P10_Average_4x4_Align2_unsafe
76        IMPORT armVCM4P10_Average_4x4_Align3_unsafe
77        IMPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
78        IMPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
79    ENDIF
80
81    IF ARM1136JS
82        IMPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
83        IMPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
84        IMPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
85        IMPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
86    ENDIF
87
88
89
90;// Declare input registers
91pSrc            RN 0
92srcStep         RN 1
93pDst            RN 2
94dstStep         RN 3
95iHeight         RN 4
96iWidth          RN 5
97
98;// Declare other intermediate registers
99idx             RN 6
100idy             RN 7
101index           RN 6
102Temp            RN 12
103pArgs           RN 11
104
105
106        ;// End of CortexA8
107
108;//-------------------------------------------------------------------------------------------------------------------------
109;//-------------------------------------------------------------------------------------------------------------------------
110    IF ARM1136JS
111
112
113        M_ALLOC4 ppDst, 8
114        M_ALLOC4 ppSrc, 8
115        M_ALLOC4 ppArgs, 16
116        M_ALLOC4 pBuffer, 120                           ;// 120 = 12x10
117        M_ALLOC8 pInterBuf, 120                         ;// 120 = 12*5*2
118        M_ALLOC8 pTempBuf, 32                           ;// 32 =  8*4
119
120        ;// Function header
121        ;// Interpolation of luma is implemented by processing block of pixels, size 4x4 at a time.
122        ;// Depending on the values of motion vector fractional parts (dx,dy), one out of 16 cases will be processed.
123        ;// Registers r4, r5, r6 to be preserved by internal unsafe functions
124        ;// r4 - iHeight
125        ;// r5 - iWidth
126        ;// r6 - index
127        M_START omxVCM4P10_InterpolateLuma, r11
128
129;// Declare other intermediate registers
130idx             RN 6
131idy             RN 7
132index           RN 6
133Temp            RN 12
134pArgs           RN 11
135
136pBuf            RN 8
137Height          RN 9
138bufStep         RN 9
139
140        ;// Define stack arguments
141        M_ARG   ptridx, 4
142        M_ARG   ptridy, 4
143        M_ARG   ptrWidth, 4
144        M_ARG   ptrHeight, 4
145
146        ;// Load structure elements of roi
147        M_LDR   idx, ptridx
148        M_LDR   idy, ptridy
149        M_LDR   iWidth, ptrWidth
150        M_LDR   iHeight, ptrHeight
151
152        M_PRINTF "roi.width %d\n", iWidth
153        M_PRINTF "roi.height %d\n", iHeight
154
155        ADD     index, idx, idy, LSL #2                 ;//  [index] = [idy][idx]
156        M_ADR   pArgs, ppArgs
157
158InterpolateLuma
159Block4x4WidthLoop
160Block4x4HeightLoop
161
162        STM     pArgs, {pSrc,srcStep,pDst,dstStep}
163        M_ADR   pBuf, pBuffer
164
165        ;// switch table using motion vector as index
166        M_SWITCH index, L
167        M_CASE  Case_0
168        M_CASE  Case_1
169        M_CASE  Case_2
170        M_CASE  Case_3
171        M_CASE  Case_4
172        M_CASE  Case_5
173        M_CASE  Case_6
174        M_CASE  Case_7
175        M_CASE  Case_8
176        M_CASE  Case_9
177        M_CASE  Case_a
178        M_CASE  Case_b
179        M_CASE  Case_c
180        M_CASE  Case_d
181        M_CASE  Case_e
182        M_CASE  Case_f
183        M_ENDSWITCH
184
185Case_0
186        ;// Case G
187        M_PRINTF "Case 0 \n"
188
189        BL      armVCM4P10_InterpolateLuma_Copy4x4_unsafe
190        B       Block4x4LoopEnd
191
192Case_1
193        ;// Case a
194        M_PRINTF "Case 1 \n"
195
196        SUB     pSrc, pSrc, #2
197        MOV     Height, #4
198        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
199        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
200        BL      armVCM4P10_Average_4x4_Align2_unsafe
201        B       Block4x4LoopEnd
202Case_2
203        ;// Case b
204        M_PRINTF "Case 2 \n"
205
206        SUB     pSrc, pSrc, #2
207        MOV     Height, #4
208        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
209        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
210        B       Block4x4LoopEnd
211Case_3
212        ;// Case c
213        M_PRINTF "Case 3 \n"
214
215        SUB     pSrc, pSrc, #2
216        MOV     Height, #4
217        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
218        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
219        BL      armVCM4P10_Average_4x4_Align3_unsafe
220        B       Block4x4LoopEnd
221Case_4
222        ;// Case d
223        M_PRINTF "Case 4 \n"
224
225        SUB     pSrc, pSrc, srcStep, LSL #1
226        MOV     Height, #9
227        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
228        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
229        BL      armVCM4P10_Average_4x4_Align0_unsafe
230
231        B       Block4x4LoopEnd
232Case_5
233        ;// Case e
234        M_PRINTF "Case 5 \n"
235
236        SUB     pSrc, pSrc, #2
237        MOV     Height, #4
238        M_ADR   pDst, pTempBuf
239        MOV     dstStep, #4
240        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
241        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
242        M_ADR   pArgs, ppArgs
243        LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
244        SUB     pSrc, pSrc, srcStep, LSL #1
245        M_ADR   pBuf, pBuffer
246        MOV     Height, #9
247        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
248        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
249        M_ADR   pSrc, pTempBuf
250        MOV     srcStep, #4
251        BL      armVCM4P10_Average_4x4_Align0_unsafe
252
253
254        B       Block4x4LoopEnd
255Case_6
256        ;// Case f
257        M_PRINTF "Case 6 \n"
258
259        SUB     pSrc, pSrc, #2
260        SUB     pSrc, pSrc, srcStep, LSL #1
261        MOV     Height, #9
262        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
263        M_ADR   pBuf, pInterBuf
264        BL      armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
265        M_ADR   idy, pTempBuf
266        BL      armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
267        BL      armVCM4P10_Average_4x4_Align0_unsafe
268        B       Block4x4LoopEnd
269Case_7
270        ;// Case g
271        M_PRINTF "Case 7 \n"
272
273        SUB     pSrc, pSrc, #2
274        MOV     Height, #4
275        M_ADR   pDst, pTempBuf
276        MOV     dstStep, #4
277        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
278        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
279        M_ADR   pArgs, ppArgs
280        LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
281        SUB     pSrc, pSrc, srcStep, LSL #1
282        ADD     pSrc, pSrc, #1
283        M_ADR   pBuf, pBuffer
284        MOV     Height, #9
285        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
286        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
287        M_ADR   pSrc, pTempBuf
288        MOV     srcStep, #4
289        BL      armVCM4P10_Average_4x4_Align0_unsafe
290
291        B       Block4x4LoopEnd
292Case_8
293        ;// Case h
294        M_PRINTF "Case 8 \n"
295
296        SUB     pSrc, pSrc, srcStep, LSL #1
297        MOV     Height, #9
298        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
299        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
300        B       Block4x4LoopEnd
301Case_9
302        ;// Case i
303        M_PRINTF "Case 9 \n"
304
305        SUB     pSrc, pSrc, #2
306        SUB     pSrc, pSrc, srcStep, LSL #1
307        MOV     Height, #9
308        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
309        ADD     pSrc, pSrc, srcStep, LSL #1
310        M_ADR   pBuf, pInterBuf
311        BL      armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
312        M_ADR   idy, pTempBuf
313        BL      armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
314        BL      armVCM4P10_Average_4x4_Align2_unsafe
315        B       Block4x4LoopEnd
316Case_a
317        ;// Case j
318        M_PRINTF "Case a \n"
319
320        SUB     pSrc, pSrc, #2
321        SUB     pSrc, pSrc, srcStep, LSL #1
322        MOV     Height, #9
323        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
324        ADD     pSrc, pSrc, srcStep, LSL #1
325        M_ADR   pBuf, pInterBuf
326        BL      armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
327        B       Block4x4LoopEnd
328Case_b
329        ;// Case k
330        M_PRINTF "Case b \n"
331        SUB     pSrc, pSrc, #2
332        SUB     pSrc, pSrc, srcStep, LSL #1
333        MOV     Height, #9
334        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
335        ADD     pSrc, pSrc, srcStep, LSL #1
336        M_ADR   pBuf, pInterBuf
337        BL      armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
338        M_ADR   idy, pTempBuf
339        BL      armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
340        BL      armVCM4P10_Average_4x4_Align3_unsafe
341        B       Block4x4LoopEnd
342Case_c
343        ;// Case n
344        M_PRINTF "Case c \n"
345
346        SUB     pSrc, pSrc, srcStep, LSL #1
347        MOV     Height, #9
348        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
349        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
350        ADD     pSrc, pSrc, srcStep                     ;// Update pSrc to one row down
351        BL      armVCM4P10_Average_4x4_Align0_unsafe
352        B       Block4x4LoopEnd
353Case_d
354        ;// Case p
355        M_PRINTF "Case d \n"
356        SUB     pSrc, pSrc, #2
357        ADD     pSrc, pSrc, srcStep
358        MOV     Height, #4
359        M_ADR   pDst, pTempBuf
360        MOV     dstStep, #4
361        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
362        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
363        M_ADR   pArgs, ppArgs
364        LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
365        SUB     pSrc, pSrc, srcStep, LSL #1
366        M_ADR   pBuf, pBuffer
367        MOV     Height, #9
368        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
369        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
370        M_ADR   pSrc, pTempBuf
371        MOV     srcStep, #4
372        BL      armVCM4P10_Average_4x4_Align0_unsafe
373        B       Block4x4LoopEnd
374Case_e
375        ;// Case q
376        M_PRINTF "Case e \n"
377
378        SUB     pSrc, pSrc, #2
379        SUB     pSrc, pSrc, srcStep, LSL #1
380        MOV     Height, #9
381        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
382        M_ADR   pBuf, pInterBuf
383        BL      armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
384        M_ADR   idy, pTempBuf
385        BL      armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
386        ADD     pSrc, pSrc, #4
387        BL      armVCM4P10_Average_4x4_Align0_unsafe
388
389        B       Block4x4LoopEnd
390Case_f
391        ;// Case r
392        M_PRINTF "Case f \n"
393        SUB     pSrc, pSrc, #2
394        ADD     pSrc, pSrc, srcStep
395        MOV     Height, #4
396        M_ADR   pDst, pTempBuf
397        MOV     dstStep, #4
398        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
399        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
400        M_ADR   pArgs, ppArgs
401        LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
402        SUB     pSrc, pSrc, srcStep, LSL #1
403        ADD     pSrc, pSrc, #1
404        M_ADR   pBuf, pBuffer
405        MOV     Height, #9
406        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
407        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
408        M_ADR   pSrc, pTempBuf
409        MOV     srcStep, #4
410        BL      armVCM4P10_Average_4x4_Align0_unsafe
411
412Block4x4LoopEnd
413
414        ;// Width Loop
415        SUBS    iWidth, iWidth, #4
416        M_ADR   pArgs, ppArgs
417        LDM     pArgs, {pSrc,srcStep,pDst,dstStep}  ;// Load arguments
418        ADD     pSrc, pSrc, #4
419        ADD     pDst, pDst, #4
420        BGT     Block4x4WidthLoop
421
422        ;// Height Loop
423        SUBS    iHeight, iHeight, #4
424        M_LDR   iWidth, ptrWidth
425        M_ADR   pArgs, ppArgs
426        ADD     pSrc, pSrc, srcStep, LSL #2
427        ADD     pDst, pDst, dstStep, LSL #2
428        SUB     pSrc, pSrc, iWidth
429        SUB     pDst, pDst, iWidth
430        BGT     Block4x4HeightLoop
431
432EndOfInterpolation
433        MOV     r0, #0
434        M_END
435
436    ENDIF
437
438
439    END
440
441