1///*****************************************************************************
2//*
3//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4//*
5//* Licensed under the Apache License, Version 2.0 (the "License");
6//* you may not use this file except in compliance with the License.
7//* You may obtain a copy of the License at:
8//*
9//* http://www.apache.org/licenses/LICENSE-2.0
10//*
11//* Unless required by applicable law or agreed to in writing, software
12//* distributed under the License is distributed on an "AS IS" BASIS,
13//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14//* See the License for the specific language governing permissions and
15//* limitations under the License.
16//*
17//*****************************************************************************/
18///**
19// *******************************************************************************
20// * ,:file
21// *  ihevc_mem_fns_neon.s
22// *
23// * ,:brief
24// *  Contains function definitions for memory manipulation
25// *
26// * ,:author
27// *     Naveen SR
28// *
29// * ,:par List of Functions:
30// *  - ihevc_memcpy()
31// *  - ihevc_memset_mul_8()
32// *  - ihevc_memset_16bit_mul_8()
33// *
34// * ,:remarks
35// *  None
36// *
37// *******************************************************************************
38//*/
39
40///**
41//*******************************************************************************
42//*
43//* ,:brief
44//*   memcpy of a 1d array
45//*
46//* ,:par Description:
47//*   Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes
48//*
49//* ,:param[in] pu1_dst
50//*  UWORD8 pointer to the destination
51//*
52//* ,:param[in] pu1_src
53//*  UWORD8 pointer to the source
54//*
55//* ,:param[in] num_bytes
56//*  number of bytes to copy
57//* ,:returns
58//*
59//* ,:remarks
60//*  None
61//*
62//*******************************************************************************
63//*/
64//void ihevc_memcpy_mul_8(UWORD8 *pu1_dst,
65//                      UWORD8 *pu1_src,
66//                      UWORD8 num_bytes)
67//**************Variables Vs Registers*************************
68//    x0 => *pu1_dst
69//    x1 => *pu1_src
70//    x2 => num_bytes
71
72.text
73.p2align 2
74
75
76    .global ihevc_memcpy_mul_8_av8
77.type ihevc_memcpy_mul_8_av8, %function
78
79ihevc_memcpy_mul_8_av8:
80
81LOOP_NEON_MEMCPY_MUL_8:
82    // Memcpy 8 bytes
83    LD1         {v0.8b},[x1],#8
84    ST1         {v0.8b},[x0],#8
85
86    SUBS        x2,x2,#8
87    BNE         LOOP_NEON_MEMCPY_MUL_8
88    ret
89
90
91
92//*******************************************************************************
93//*/
94//void ihevc_memcpy(UWORD8 *pu1_dst,
95//                  UWORD8 *pu1_src,
96//                  UWORD8 num_bytes)
97//**************Variables Vs Registers*************************
98//    x0 => *pu1_dst
99//    x1 => *pu1_src
100//    x2 => num_bytes
101
102
103
104    .global ihevc_memcpy_av8
105.type ihevc_memcpy_av8, %function
106
107ihevc_memcpy_av8:
108    SUBS        x2,x2,#8
109    BLT         ARM_MEMCPY
110LOOP_NEON_MEMCPY:
111    // Memcpy 8 bytes
112    LD1         {v0.8b},[x1],#8
113    ST1         {v0.8b},[x0],#8
114
115    SUBS        x2,x2,#8
116    BGE         LOOP_NEON_MEMCPY
117    CMN         x2,#8
118    BEQ         MEMCPY_RETURN
119
120ARM_MEMCPY:
121    ADD         x2,x2,#8
122
123LOOP_ARM_MEMCPY:
124    LDRB        w3,[x1],#1
125    STRB        w3,[x0],#1
126    SUBS        x2,x2,#1
127    BNE         LOOP_ARM_MEMCPY
128MEMCPY_RETURN:
129    ret
130
131
132
133
134//void ihevc_memset_mul_8(UWORD8 *pu1_dst,
135//                       UWORD8 value,
136//                       UWORD8 num_bytes)
137//**************Variables Vs Registers*************************
138//    x0 => *pu1_dst
139//    x1 => value
140//    x2 => num_bytes
141
142.text
143.p2align 2
144
145
146
147    .global ihevc_memset_mul_8_av8
148.type ihevc_memset_mul_8_av8, %function
149
150ihevc_memset_mul_8_av8:
151
152// Assumptions: numbytes is either 8, 16 or 32
153    dup         v0.8b,w1
154LOOP_MEMSET_MUL_8:
155    // Memset 8 bytes
156    ST1         {v0.8b},[x0],#8
157
158    SUBS        x2,x2,#8
159    BNE         LOOP_MEMSET_MUL_8
160
161    ret
162
163
164
165
166//void ihevc_memset(UWORD8 *pu1_dst,
167//                       UWORD8 value,
168//                       UWORD8 num_bytes)
169//**************Variables Vs Registers*************************
170//    x0 => *pu1_dst
171//    x1 => value
172//    x2 => num_bytes
173
174
175
176    .global ihevc_memset_av8
177.type ihevc_memset_av8, %function
178
179ihevc_memset_av8:
180    SUBS        x2,x2,#8
181    BLT         ARM_MEMSET
182    dup         v0.8b,w1
183LOOP_NEON_MEMSET:
184    // Memcpy 8 bytes
185    ST1         {v0.8b},[x0],#8
186
187    SUBS        x2,x2,#8
188    BGE         LOOP_NEON_MEMSET
189    CMN         x2,#8
190    BEQ         MEMSET_RETURN
191
192ARM_MEMSET:
193    ADD         x2,x2,#8
194
195LOOP_ARM_MEMSET:
196    STRB        w1,[x0],#1
197    SUBS        x2,x2,#1
198    BNE         LOOP_ARM_MEMSET
199
200MEMSET_RETURN:
201    ret
202
203
204
205
206//void ihevc_memset_16bit_mul_8(UWORD16 *pu2_dst,
207//                                      UWORD16 value,
208//                                      UWORD8 num_words)
209//**************Variables Vs Registers*************************
210//    x0 => *pu2_dst
211//    x1 => value
212//    x2 => num_words
213
214.text
215.p2align 2
216
217
218
219    .global ihevc_memset_16bit_mul_8_av8
220.type ihevc_memset_16bit_mul_8_av8, %function
221
222ihevc_memset_16bit_mul_8_av8:
223
224// Assumptions: num_words is either 8, 16 or 32
225
226    // Memset 8 words
227    dup         v0.8h,w1
228LOOP_MEMSET_16BIT_MUL_8:
229    ST1         {v0.8h},[x0],#16
230
231    SUBS        x2,x2,#8
232    BNE         LOOP_MEMSET_16BIT_MUL_8
233
234    ret
235
236
237
238
239//void ihevc_memset_16bit(UWORD16 *pu2_dst,
240//                       UWORD16 value,
241//                       UWORD8 num_words)
242//**************Variables Vs Registers*************************
243//    x0 => *pu2_dst
244//    x1 => value
245//    x2 => num_words
246
247
248
249    .global ihevc_memset_16bit_av8
250.type ihevc_memset_16bit_av8, %function
251
252ihevc_memset_16bit_av8:
253    SUBS        x2,x2,#8
254    BLT         ARM_MEMSET_16BIT
255    dup         v0.8h,w1
256LOOP_NEON_MEMSET_16BIT:
257    // Memset 8 words
258    ST1         {v0.8h},[x0],#16
259
260    SUBS        x2,x2,#8
261    BGE         LOOP_NEON_MEMSET_16BIT
262    CMN         x2,#8
263    BEQ         MEMSET_16BIT_RETURN
264
265ARM_MEMSET_16BIT:
266    ADD         x2,x2,#8
267
268LOOP_ARM_MEMSET_16BIT:
269    STRH        w1,[x0],#2
270    SUBS        x2,x2,#1
271    BNE         LOOP_ARM_MEMSET_16BIT
272
273MEMSET_16BIT_RETURN:
274    ret
275
276
277
278
279    .section .note.GNU-stack,"",%progbits
280
281