1@/******************************************************************************
2@ *
3@ * Copyright (C) 2015 The Android Open Source Project
4@ *
5@ * Licensed under the Apache License, Version 2.0 (the "License");
6@ * you may not use this file except in compliance with the License.
7@ * You may obtain a copy of the License at:
8@ *
9@ * http://www.apache.org/licenses/LICENSE-2.0
10@ *
11@ * Unless required by applicable law or agreed to in writing, software
12@ * distributed under the License is distributed on an "AS IS" BASIS,
13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@ * See the License for the specific language governing permissions and
15@ * limitations under the License.
16@ *
17@ *****************************************************************************
18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19@*/
20@**
21@ *******************************************************************************
22@ * @file
23@ *  ih264_mem_fns_neon.s
24@ *
25@ * @brief
26@ *  Contains function definitions for memory manipulation
27@ *
28@ * @author
29@ *  Naveen SR
30@ *
31@ * @par List of Functions:
32@ *  - ih264_memcpy_mul_8_a9q()
33@ *  - ih264_memcpy_a9q()
34@ *  - ih264_memset_mul_8_a9q()
35@ *  - ih264_memset_a9q()
36@ *  - ih264_memset_16bit_mul_8_a9q()
37@ *  - ih264_memset_a9q()
38@ *
39@ * @remarks
40@ *  None
41@ *
42@ *******************************************************************************
43@*
44
45@**
46@*******************************************************************************
47@*
48@* @brief
49@*   memcpy of a 1d array
50@*
51@* @par Description:
52@*   Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes
53@*
54@* @param[in] pu1_dst
55@*  UWORD8 pointer to the destination
56@*
57@* @param[in] pu1_src
58@*  UWORD8 pointer to the source
59@*
60@* @param[in] num_bytes
61@*  number of bytes to copy
62@* @returns
63@*
64@* @remarks
65@*  None
66@*
67@*******************************************************************************
68@*
69@void ih264_memcpy_mul_8(UWORD8 *pu1_dst,
70@                    UWORD8 *pu1_src,
71@                   UWORD8 num_bytes)
72@**************Variables Vs Registers*************************
73@   r0 => *pu1_dst
74@   r1 => *pu1_src
75@   r2 => num_bytes
76
77.text
78.p2align 2
79
80
81    .global ih264_memcpy_mul_8_a9q
82
83ih264_memcpy_mul_8_a9q:
84
85loop_neon_memcpy_mul_8:
86    @ Memcpy 8 bytes
87    vld1.8        d0, [r1]!
88    vst1.8        d0, [r0]!
89
90    subs          r2, r2, #8
91    bne           loop_neon_memcpy_mul_8
92    bx            lr
93
94
95
96@*******************************************************************************
97@*
98@void ih264_memcpy(UWORD8 *pu1_dst,
99@                  UWORD8 *pu1_src,
100@                  UWORD8 num_bytes)
101@**************Variables Vs Registers*************************
102@   r0 => *pu1_dst
103@   r1 => *pu1_src
104@   r2 => num_bytes
105
106
107
108    .global ih264_memcpy_a9q
109
110ih264_memcpy_a9q:
111    subs          r2, #8
112    blt           memcpy
113loop_neon_memcpy:
114    @ Memcpy 8 bytes
115    vld1.8        d0, [r1]!
116    vst1.8        d0, [r0]!
117
118    subs          r2, #8
119    bge           loop_neon_memcpy
120    cmp           r2, #-8
121    bxeq          lr
122
123memcpy:
124    add           r2, #8
125
126loop_memcpy:
127    ldrb          r3, [r1], #1
128    strb          r3, [r0], #1
129    subs          r2, #1
130    bne           loop_memcpy
131    bx            lr
132
133
134
135
136@void ih264_memset_mul_8(UWORD8 *pu1_dst,
137@                       UWORD8 value,
138@                       UWORD8 num_bytes)
139@**************Variables Vs Registers*************************
140@   r0 => *pu1_dst
141@   r1 => value
142@   r2 => num_bytes
143
144
145
146
147
148    .global ih264_memset_mul_8_a9q
149
150ih264_memset_mul_8_a9q:
151
152@ Assumptions: numbytes is either 8, 16 or 32
153    vdup.8        d0, r1
154loop_memset_mul_8:
155    @ Memset 8 bytes
156    vst1.8        d0, [r0]!
157
158    subs          r2, r2, #8
159    bne           loop_memset_mul_8
160
161    bx            lr
162
163
164
165
166@void ih264_memset(UWORD8 *pu1_dst,
167@                       UWORD8 value,
168@                       UWORD8 num_bytes)
169@**************Variables Vs Registers*************************
170@   r0 => *pu1_dst
171@   r1 => value
172@   r2 => num_bytes
173
174
175
176    .global ih264_memset_a9q
177
178ih264_memset_a9q:
179    subs          r2, #8
180    blt           memset
181    vdup.8        d0, r1
182loop_neon_memset:
183    @ Memcpy 8 bytes
184    vst1.8        d0, [r0]!
185
186    subs          r2, #8
187    bge           loop_neon_memset
188    cmp           r2, #-8
189    bxeq          lr
190
191memset:
192    add           r2, #8
193
194loop_memset:
195    strb          r1, [r0], #1
196    subs          r2, #1
197    bne           loop_memset
198    bx            lr
199
200
201
202
203@void ih264_memset_16bit_mul_8(UWORD16 *pu2_dst,
204@                                   UWORD16 value,
205@                                   UWORD8 num_words)
206@**************Variables Vs Registers*************************
207@   r0 => *pu2_dst
208@   r1 => value
209@   r2 => num_words
210
211
212
213
214
215    .global ih264_memset_16bit_mul_8_a9q
216
217ih264_memset_16bit_mul_8_a9q:
218
219@ Assumptions: num_words is either 8, 16 or 32
220
221    @ Memset 8 words
222    vdup.16       d0, r1
223loop_memset_16bit_mul_8:
224    vst1.16       d0, [r0]!
225    vst1.16       d0, [r0]!
226
227    subs          r2, r2, #8
228    bne           loop_memset_16bit_mul_8
229
230    bx            lr
231
232
233
234
235@void ih264_memset_16bit(UWORD16 *pu2_dst,
236@                       UWORD16 value,
237@                       UWORD8 num_words)
238@**************Variables Vs Registers*************************
239@   r0 => *pu2_dst
240@   r1 => value
241@   r2 => num_words
242
243
244
245    .global ih264_memset_16bit_a9q
246
247ih264_memset_16bit_a9q:
248    subs          r2, #8
249    blt           memset_16bit
250    vdup.16       d0, r1
251loop_neon_memset_16bit:
252    @ Memset 8 words
253    vst1.16       d0, [r0]!
254    vst1.16       d0, [r0]!
255
256    subs          r2, #8
257    bge           loop_neon_memset_16bit
258    cmp           r2, #-8
259    bxeq          lr
260
261memset_16bit:
262    add           r2, #8
263
264loop_memset_16bit:
265    strh          r1, [r0], #2
266    subs          r2, #1
267    bne           loop_memset_16bit
268    bx            lr
269
270
271
272
273