1/*
2 * Copyright (c) 2017 Imagination Technologies.
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 *      * Redistributions of source code must retain the above copyright
11 *        notice, this list of conditions and the following disclaimer.
12 *      * Redistributions in binary form must reproduce the above copyright
13 *        notice, this list of conditions and the following disclaimer
14 *        in the documentation and/or other materials provided with
15 *        the distribution.
16 *      * Neither the name of Imagination Technologies nor the names of its
17 *        contributors may be used to endorse or promote products derived
18 *        from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <string.h>
34
35#if !defined(UNALIGNED_INSTR_SUPPORT)
36/* does target have unaligned lw/ld/ualw/uald instructions? */
37#define UNALIGNED_INSTR_SUPPORT 0
38#if __mips_isa_rev < 6 && !__mips1
39#undef UNALIGNED_INSTR_SUPPORT
40#define UNALIGNED_INSTR_SUPPORT 1
41#endif
42#endif
43
44#if !defined(HW_UNALIGNED_SUPPORT)
45/* Does target have hardware support for unaligned accesses?  */
46#define HW_UNALIGNED_SUPPORT 0
47#if __mips_isa_rev >= 6
48#undef HW_UNALIGNED_SUPPORT
49#define HW_UNALIGNED_SUPPORT 1
50#endif
51#endif
52
53#define ENABLE_PREFETCH     1
54
55#if ENABLE_PREFETCH
56#define PREFETCH(addr)  __builtin_prefetch (addr, 0, 1);
57#else
58#define PREFETCH(addr)
59#endif
60
61#if _MIPS_SIM == _ABIO32
62typedef unsigned long reg_t;
63typedef struct
64{
65  reg_t B0:8, B1:8, B2:8, B3:8;
66} bits_t;
67#else
68typedef unsigned long long reg_t;
69typedef struct
70{
71  reg_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
72} bits_t;
73#endif
74
75typedef union
76{
77  reg_t v;
78  bits_t b;
79} bitfields_t;
80
81#define DO_BYTE(a, i)   \
82  a[i] = bw.b.B##i;     \
83  len--;                \
84  if(!len) return ret;  \
85
86/* This code is called when aligning a pointer, there are remaining bytes
87   after doing word compares, or architecture does not have some form
88   of unaligned support.  */
89static inline void * __attribute__ ((always_inline))
90do_bytes (void *a, const void *b, unsigned long len, void *ret)
91{
92  unsigned char *x = (unsigned char *) a;
93  unsigned char *y = (unsigned char *) b;
94  unsigned long i;
95
96  /* 'len' might be zero here, so preloading the first two values
97     before the loop may access unallocated memory.  */
98  for (i = 0; i < len; i++) {
99    *x = *y;
100    x++;
101    y++;
102  }
103  return ret;
104}
105
106/* This code is called to copy only remaining bytes within word or doubleword */
107static inline void * __attribute__ ((always_inline))
108do_bytes_remaining (void *a, const void *b, unsigned long len, void *ret)
109{
110  unsigned char *x = (unsigned char *) a;
111
112  if(len > 0) {
113    bitfields_t bw;
114    bw.v = *((reg_t*) b);
115
116#if __mips64
117    DO_BYTE(x, 0);
118    DO_BYTE(x, 1);
119    DO_BYTE(x, 2);
120    DO_BYTE(x, 3);
121    DO_BYTE(x, 4);
122    DO_BYTE(x, 5);
123    DO_BYTE(x, 6);
124    DO_BYTE(x, 7);
125#else
126    DO_BYTE(x, 0);
127    DO_BYTE(x, 1);
128    DO_BYTE(x, 2);
129    DO_BYTE(x, 3);
130#endif
131  }
132
133    return ret;
134}
135
136#if !HW_UNALIGNED_SUPPORT
137#if UNALIGNED_INSTR_SUPPORT
138/* for MIPS GCC, there are no unaligned builtins - so this struct forces
139   the compiler to treat the pointer access as unaligned.  */
140struct ulw
141{
142  reg_t uli;
143} __attribute__ ((packed));
144
145/* first pointer is not aligned while second pointer is.  */
146static void *
147unaligned_words (struct ulw *a, const reg_t * b,
148                 unsigned long words, unsigned long bytes, void *ret)
149{
150#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
151  unsigned long i, words_by_8, words_by_1;
152  words_by_1 = words % 8;
153  words_by_8 = words >> 3;
154  for (; words_by_8 > 0; words_by_8--) {
155    if(words_by_8 != 1)
156      PREFETCH (b + 8);
157    reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
158    reg_t y4 = b[4], y5 = b[5], y6 = b[6], y7 = b[7];
159    a[0].uli = y0;
160    a[1].uli = y1;
161    a[2].uli = y2;
162    a[3].uli = y3;
163    a[4].uli = y4;
164    a[5].uli = y5;
165    a[6].uli = y6;
166    a[7].uli = y7;
167    a += 8;
168    b += 8;
169  }
170#else
171  unsigned long i, words_by_4, words_by_1;
172  words_by_1 = words % 4;
173  words_by_4 = words >> 2;
174   for (; words_by_4 > 0; words_by_4--) {
175    if(words_by_4 != 1)
176      PREFETCH (b + 4);
177    reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
178    a[0].uli = y0;
179    a[1].uli = y1;
180    a[2].uli = y2;
181    a[3].uli = y3;
182    a += 4;
183    b += 4;
184  }
185#endif
186
187  /* do remaining words.  */
188  for (i = 0; i < words_by_1; i++) {
189    a->uli = *b;
190    a += 1;
191    b += 1;
192  }
193
194  /* mop up any remaining bytes.  */
195  return do_bytes_remaining (a, b, bytes, ret);
196}
197#else
198/* no HW support or unaligned lw/ld/ualw/uald instructions.  */
199static void *
200unaligned_words (reg_t * a, const reg_t * b,
201                 unsigned long words, unsigned long bytes, void *ret)
202{
203  unsigned long i;
204  unsigned char *x = (unsigned char *) a;
205
206  for (i = 0; i < words; i++) {
207    bitfields_t bw;
208    bw.v = *((reg_t*) b);
209    x = (unsigned char *) a;
210#if __mips64
211    x[0] = bw.b.B0;
212    x[1] = bw.b.B1;
213    x[2] = bw.b.B2;
214    x[3] = bw.b.B3;
215    x[4] = bw.b.B4;
216    x[5] = bw.b.B5;
217    x[6] = bw.b.B6;
218    x[7] = bw.b.B7;
219#else
220    x[0] = bw.b.B0;
221    x[1] = bw.b.B1;
222    x[2] = bw.b.B2;
223    x[3] = bw.b.B3;
224#endif
225    a += 1;
226    b += 1;
227  }
228
229  /* mop up any remaining bytes */
230  return do_bytes_remaining (a, b, bytes, ret);
231}
232#endif /* UNALIGNED_INSTR_SUPPORT */
233#endif /* HW_UNALIGNED_SUPPORT */
234
235/* both pointers are aligned, or first isn't and HW support for unaligned.  */
236static void *
237aligned_words (reg_t * a, const reg_t * b,
238               unsigned long words, unsigned long bytes, void *ret)
239{
240#if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400)
241  unsigned long i, words_by_8, words_by_1;
242  words_by_1 = words % 8;
243  words_by_8 = words >> 3;
244  for (; words_by_8 > 0; words_by_8--) {
245    if(words_by_8 != 1)
246      PREFETCH (b + 8);
247    reg_t x0 = b[0], x1 = b[1], x2 = b[2], x3 = b[3];
248    reg_t x4 = b[4], x5 = b[5], x6 = b[6], x7 = b[7];
249    a[0] = x0;
250    a[1] = x1;
251    a[2] = x2;
252    a[3] = x3;
253    a[4] = x4;
254    a[5] = x5;
255    a[6] = x6;
256    a[7] = x7;
257    a += 8;
258    b += 8;
259  }
260#else
261  unsigned long i, words_by_4, words_by_1;
262  words_by_1 = words % 4;
263  words_by_4 = words >> 2;
264  for (; words_by_4 > 0; words_by_4--) {
265    if(words_by_4 != 1)
266      PREFETCH (b + 4);
267    reg_t x0 = b[0], x1 = b[1], x2 = b[2], x3 = b[3];
268    a[0] = x0;
269    a[1] = x1;
270    a[2] = x2;
271    a[3] = x3;
272    a += 4;
273    b += 4;
274  }
275#endif
276
277  /* do remaining words.  */
278  for (i = 0; i < words_by_1; i++) {
279    *a = *b;
280    a += 1;
281    b += 1;
282  }
283
284  /* mop up any remaining bytes.  */
285  return do_bytes_remaining (a, b, bytes, ret);
286}
287
288void *
289memcpy (void *a, const void *b, size_t len) __overloadable
290{
291  unsigned long bytes, words;
292  void *ret = a;
293
294  /* shouldn't hit that often.  */
295  if (len < sizeof (reg_t) * 4) {
296    return do_bytes (a, b, len, a);
297  }
298
299  /* Align the second pointer to word/dword alignment.
300     Note that the pointer is only 32-bits for o32/n32 ABIs. For
301     n32, loads are done as 64-bit while address remains 32-bit.   */
302  bytes = ((unsigned long) b) % sizeof (reg_t);
303  if (bytes) {
304    bytes = sizeof (reg_t) - bytes;
305    if (bytes > len)
306      bytes = len;
307    do_bytes (a, b, bytes, ret);
308    if (len == bytes)
309      return ret;
310    len -= bytes;
311    a = (void *) (((unsigned char *) a) + bytes);
312    b = (const void *) (((unsigned char *) b) + bytes);
313  }
314
315  /* Second pointer now aligned.  */
316  words = len / sizeof (reg_t);
317  bytes = len % sizeof (reg_t);
318#if HW_UNALIGNED_SUPPORT
319  /* treat possible unaligned first pointer as aligned.  */
320  return aligned_words (a, b, words, bytes, ret);
321#else
322  if (((unsigned long) a) % sizeof (reg_t) == 0) {
323    return aligned_words (a, b, words, bytes, ret);
324  }
325  /* need to use unaligned instructions on first pointer.  */
326  return unaligned_words (a, b, words, bytes, ret);
327#endif
328}
329