1@/*****************************************************************************
2@*
3@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4@*
5@* Licensed under the Apache License, Version 2.0 (the "License");
6@* you may not use this file except in compliance with the License.
7@* You may obtain a copy of the License at:
8@*
9@* http://www.apache.org/licenses/LICENSE-2.0
10@*
11@* Unless required by applicable law or agreed to in writing, software
12@* distributed under the License is distributed on an "AS IS" BASIS,
13@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@* See the License for the specific language governing permissions and
15@* limitations under the License.
16@*
17@*****************************************************************************/
18@/**
19@/*******************************************************************************
20@* @file
21@*  ihevcd_fmt_conv_420sp_to_420sp.s
22@*
23@* @brief
24@*  contains function definitions for format conversions
25@*
26@* @author
27@*  ittiam
28@*
29@* @par list of functions:
30@*
31@*
32@* @remarks
33@*  none
34@*
35@*******************************************************************************/
36    .equ DO1STROUNDING, 0
37
38    @ ARM
39    @
40    @ PRESERVE8
41
42.text
43.p2align 2
44
45
46
47
48
49@/*****************************************************************************
50@*                                                                            *
51@*  Function Name    : ihevcd_fmt_conv_420sp_to_420sp()                       *
52@*                                                                            *
53@*  Description      : This function conversts the image from YUV420SP color  *
54@*                     space to 420SP color space(UV interleaved).            *
55@*                                                                            *
56@*  Arguments        : R0           pu1_y                                     *
57@*                     R1           pu1_uv                                    *
58@*                     R2           pu1_dest_y                                *
59@*                     R3           pu1_dest_uv                               *
60@*                     [R13 #40]    u2_width                                  *
61@*                     [R13 #44]    u2_height                                 *
62@*                     [R13 #48]    u2_stridey                                *
63@*                     [R13 #52]    u2_stridechroma                           *
64@*                     [R13 #56]    u2_dest_stridey                           *
65@*                     [R13 #60]    u2_dest_stridechroma                      *
66@*                                                                            *
67@*  Values Returned  : None                                                   *
68@*                                                                            *
69@*  Register Usage   : R0 - R14                                               *
70@*                                                                            *
71@*  Stack Usage      : 40 Bytes                                               *
72@*                                                                            *
73@*  Interruptibility : Interruptible                                          *
74@*                                                                            *
75@*  Known Limitations                                                         *
76@*       Assumptions: Image Width:     Assumed to be multiple of 2 and       *
77@*                     Image Height:    Assumed to be even.                   *
78@*                                                                            *
79@*  Revision History :                                                        *
80@*         DD MM YYYY   Author(s)       Changes (Describe the changes made)   *
81@*         16 05 2012   Naveen SR     draft                                   *
82@*                                                                            *
83@*****************************************************************************/
84
85    .global ihevcd_fmt_conv_420sp_to_420sp_a9q
86.type ihevcd_fmt_conv_420sp_to_420sp_a9q, %function
87ihevcd_fmt_conv_420sp_to_420sp_a9q:
88
89    STMFD       sp!,{r4-r12, lr}
90
91
92    LDR         r5,[sp,#56]                 @//Load u2_dest_stridey
93
94    LDR         r7,[sp,#48]                 @//Load u2_stridey
95    LDR         r8,[sp,#40]                 @//Load u2_width
96    LDR         r9,[sp,#44]                 @//Load u2_height
97
98    SUB         r10,r7,r8                   @// Src Y increment
99    SUB         r11,r5,r8                   @// Dst Y increment
100
101    @/* Copy Y */
102
103    MOV         r4,r9                       @// Copying height
104y_row_loop:
105    MOV         r6,r8                       @// Copying width
106
107y_col_loop:
108    PLD         [r0, #128]
109    SUB         r6,r6,#32
110    VLD1.8      D0,[r0]!
111    VLD1.8      D1,[r0]!
112    VLD1.8      D2,[r0]!
113    VLD1.8      D3,[r0]!
114    VST1.8      D0,[R2]!
115    VST1.8      D1,[R2]!
116    VST1.8      D2,[R2]!
117    VST1.8      D3,[R2]!
118    CMP         r6,#32
119    BGE         y_col_loop
120    CMP         r6,#0
121    BEQ         y_col_loop_end
122    @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
123    @//Ex if width is 162, above loop will process 160 pixels. And
124    @//Both source and destination will point to 146th pixel and then 16 bytes will be read
125    @// and written using VLD1 and VST1
126    RSB         r6,r6,#32
127    SUB         r0,r0,r6
128    SUB         R2,R2,r6
129    VLD1.8      D0,[r0]!
130    VLD1.8      D1,[r0]!
131    VLD1.8      D2,[r0]!
132    VLD1.8      D3,[r0]!
133    VST1.8      D0,[R2]!
134    VST1.8      D1,[R2]!
135    VST1.8      D2,[R2]!
136    VST1.8      D3,[R2]!
137
138y_col_loop_end:
139    ADD         r0, r0, r10
140    ADD         R2, R2, r11
141    SUBS        r4, r4, #1
142    BGT         y_row_loop
143
144
145
146    @/* Copy UV */
147
148    LDR         r5,[sp,#60]                 @//Load u2_dest_stridechroma
149    LDR         r7,[sp,#52]                 @//Load u2_stridechroma
150
151    MOV         r9,r9,LSR #1                @// height/2
152@   MOV     r8,r8,LSR #1            @// Width/2
153
154    MOV         R2,R3                       @pu1_dest_uv
155
156    SUB         r10,r7,r8                   @// Src UV increment
157    SUB         r11,r5,r8                   @// Dst UV increment
158
159    MOV         r4,r9                       @// Copying height
160uv_row_loop:
161    MOV         r6,r8                       @// Copying width
162
163uv_col_loop:
164
165    PLD         [r1, #128]
166    SUB         r6,r6,#16
167    VLD1.8      D0,[r1]!
168    VLD1.8      D1,[r1]!
169    VST1.8      D0,[R2]!
170    VST1.8      D1,[R2]!
171    CMP         r6,#16
172    BGE         uv_col_loop
173    CMP         r6,#0
174    BEQ         u_col_loop_end
175    @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
176    @//Ex if width is 162, above loop will process 160 pixels. And
177    @//Both source and destination will point to 146th pixel and then 16 bytes will be read
178    @// and written using VLD1 and VST1
179    RSB         r6,r6,#16
180    SUB         r1,r1,r6
181    SUB         R2,R2,r6
182    VLD1.8      D0, [r1]!
183    VLD1.8      D1, [r1]!
184    VST1.8      D0, [R2]!
185    VST1.8      D1, [R2]!
186
187u_col_loop_end:
188    ADD         r1, r1, r10
189    ADD         R2, R2, r11
190    SUBS        r4, r4, #1
191    BGT         uv_row_loop
192
193exit:
194    LDMFD       sp!,{r4-r12, pc}
195
196
197    .section .note.GNU-stack,"",%progbits
198
199