1/*
2 * Copyright (c) 2017 Imagination Technologies.
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 *      * Redistributions of source code must retain the above copyright
11 *        notice, this list of conditions and the following disclaimer.
12 *      * Redistributions in binary form must reproduce the above copyright
13 *        notice, this list of conditions and the following disclaimer
14 *        in the documentation and/or other materials provided with
15 *        the distribution.
16 *      * Neither the name of Imagination Technologies nor the names of its
17 *        contributors may be used to endorse or promote products derived
18 *        from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <string.h>
34#include <stdint.h>
35
36#define ENABLE_PREFETCH 1
37
38#define STRNG(X) #X
39#define PREFETCH(src_ptr, offset)  \
40  asm("pref 0, " STRNG(offset) "(%[src]) \n\t" : : [src] "r" (src_ptr));
41
42#if !defined(UNALIGNED_INSTR_SUPPORT)
43/* does target have unaligned lw/ld/ualw/uald instructions? */
44#define UNALIGNED_INSTR_SUPPORT 0
45#if __mips_isa_rev < 6 && !__mips1
46#undef UNALIGNED_INSTR_SUPPORT
47#define UNALIGNED_INSTR_SUPPORT 1
48#endif
49#endif
50
51#if !defined(HW_UNALIGNED_SUPPORT)
52/* Does target have hardware support for unaligned accesses?  */
53#define HW_UNALIGNED_SUPPORT 0
54#if __mips_isa_rev >= 6
55#undef HW_UNALIGNED_SUPPORT
56#define HW_UNALIGNED_SUPPORT 1
57#endif
58#endif
59
60#define SIZEOF_reg_t 4
61#if _MIPS_SIM == _ABIO32
62typedef unsigned long reg_t;
63typedef struct bits
64{
65  reg_t B0:8, B1:8, B2:8, B3:8;
66} bits_t;
67#else
68#undef SIZEOF_reg_t
69#define SIZEOF_reg_t 8
70typedef unsigned long long reg_t;
71typedef struct bits
72{
73    reg_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
74} bits_t;
75#endif
76
77/* This union assumes that small structures can be in registers.  If
78   not, then memory accesses will be done - not optimal, but ok.  */
79typedef union
80{
81  reg_t v;
82  bits_t b;
83} bitfields_t;
84
85#define do_bitfield(__i) \
86  if (x.b.B##__i != y.b.B##__i) return x.b.B##__i - y.b.B##__i;
87
88/* pull apart the words to find the first differing unsigned byte.  */
89static int __attribute__ ((noinline)) do_by_bitfields (reg_t a, reg_t b)
90{
91  bitfields_t x, y;
92  x.v = a;
93  y.v = b;
94  do_bitfield (0);
95  do_bitfield (1);
96  do_bitfield (2);
97#if SIZEOF_reg_t == 4
98  return x.b.B3 - y.b.B3;
99#else
100  do_bitfield (3);
101  do_bitfield (4);
102  do_bitfield (5);
103  do_bitfield (6);
104  return x.b.B7 - y.b.B7;
105#endif
106}
107
108/* This code is called when aligning a pointer, there are remaining bytes
109   after doing word compares, or architecture does not have some form
110   of unaligned support.  */
111static inline int __attribute__ ((always_inline))
112do_bytes (const void *a, const void *b, unsigned long len)
113{
114  unsigned char *x = (unsigned char *) a;
115  unsigned char *y = (unsigned char *) b;
116  unsigned long i;
117
118  /* 'len' might be zero here, so preloading the first two values
119     before the loop may access unallocated memory.  */
120  for (i = 0; i < len; i++) {
121    if (*x != *y)
122      return *x - *y;
123    x++;
124    y++;
125  }
126  return 0;
127}
128
129#if !HW_UNALIGNED_SUPPORT
130#if UNALIGNED_INSTR_SUPPORT
131/* for MIPS GCC, there are no unaligned builtins - so this struct forces
132   the compiler to treat the pointer access as unaligned.  */
133struct ulw
134{
135  reg_t uli;
136} __attribute__ ((packed));
137
138/* first pointer is not aligned while second pointer is.  */
139static int unaligned_words (const struct ulw *a, const reg_t *b,
140                            unsigned long words, unsigned long bytes)
141{
142#if ENABLE_PREFETCH
143  /* prefetch pointer aligned to 32 byte boundary */
144  const reg_t *pref_ptr = (const reg_t *) (((uintptr_t) b + 31) & ~31);
145  const reg_t *pref_ptr_a = (const reg_t *) (((uintptr_t) a + 31) & ~31);
146#endif
147  for (; words >= 16; words -= 8) {
148#if ENABLE_PREFETCH
149    pref_ptr += 8;
150    PREFETCH(pref_ptr, 0);
151    PREFETCH(pref_ptr, 32);
152
153    pref_ptr_a += 8;
154    PREFETCH(pref_ptr_a, 0);
155    PREFETCH(pref_ptr_a, 32);
156#endif
157    reg_t x0 = a[0].uli, x1 = a[1].uli;
158    reg_t x2 = a[2].uli, x3 = a[3].uli;
159    reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
160    if (x0 != y0)
161      return do_by_bitfields (x0, y0);
162    if (x1 != y1)
163      return do_by_bitfields (x1, y1);
164    if (x2 != y2)
165      return do_by_bitfields (x2, y2);
166    if (x3 != y3)
167      return do_by_bitfields (x3, y3);
168
169    x0 = a[4].uli; x1 = a[5].uli;
170    x2 = a[6].uli; x3 = a[7].uli;
171    y0 = b[4]; y1 = b[5]; y2 = b[6]; y3 = b[7];
172    if (x0 != y0)
173      return do_by_bitfields (x0, y0);
174    if (x1 != y1)
175      return do_by_bitfields (x1, y1);
176    if (x2 != y2)
177      return do_by_bitfields (x2, y2);
178    if (x3 != y3)
179      return do_by_bitfields (x3, y3);
180
181    a += 8;
182    b += 8;
183  }
184
185  for (; words >= 4; words -= 4) {
186    reg_t x0 = a[0].uli, x1 = a[1].uli;
187    reg_t x2 = a[2].uli, x3 = a[3].uli;
188    reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
189    if (x0 != y0)
190      return do_by_bitfields (x0, y0);
191    if (x1 != y1)
192      return do_by_bitfields (x1, y1);
193    if (x2 != y2)
194      return do_by_bitfields (x2, y2);
195    if (x3 != y3)
196      return do_by_bitfields (x3, y3);
197    a += 4;
198    b += 4;
199  }
200
201  /* do remaining words.  */
202  while (words--) {
203    reg_t x0 = a->uli;
204    reg_t y0 = *b;
205    a += 1;
206    b += 1;
207    if (x0 != y0)
208      return do_by_bitfields (x0, y0);
209  }
210
211  /* mop up any remaining bytes.  */
212  return do_bytes (a, b, bytes);
213}
214#else
215/* no HW support or unaligned lw/ld/ualw/uald instructions.  */
216static int unaligned_words (const reg_t *a, const reg_t *b,
217                            unsigned long words, unsigned long bytes)
218{
219  return do_bytes (a, b, (sizeof (reg_t) * words) + bytes);
220}
221#endif /* UNALIGNED_INSTR_SUPPORT */
222#endif /* HW_UNALIGNED_SUPPORT */
223
224/* both pointers are aligned, or first isn't and HW support for unaligned.  */
225static int aligned_words (const reg_t *a, const reg_t *b,
226                          unsigned long words, unsigned long bytes)
227{
228#if ENABLE_PREFETCH
229  /* prefetch pointer aligned to 32 byte boundary */
230  const reg_t *pref_ptr = (const reg_t *) (((uintptr_t) b + 31) & ~31);
231  const reg_t *pref_ptr_a = (const reg_t *) (((uintptr_t) a + 31) & ~31);
232#endif
233
234  for (; words >= 24; words -= 12) {
235#if ENABLE_PREFETCH
236    pref_ptr += 12;
237    PREFETCH(pref_ptr, 0);
238    PREFETCH(pref_ptr, 32);
239    PREFETCH(pref_ptr, 64);
240
241    pref_ptr_a += 12;
242    PREFETCH(pref_ptr_a, 0);
243    PREFETCH(pref_ptr_a, 32);
244    PREFETCH(pref_ptr_a, 64);
245#endif
246    reg_t x0 = a[0], x1 = a[1], x2 = a[2], x3 = a[3];
247    reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
248    if (x0 != y0)
249      return do_by_bitfields (x0, y0);
250    if (x1 != y1)
251      return do_by_bitfields (x1, y1);
252    if (x2 != y2)
253      return do_by_bitfields (x2, y2);
254    if (x3 != y3)
255      return do_by_bitfields (x3, y3);
256
257    x0 = a[4]; x1 = a[5]; x2 = a[6]; x3 = a[7];
258    y0 = b[4]; y1 = b[5]; y2 = b[6]; y3 = b[7];
259    if (x0 != y0)
260      return do_by_bitfields (x0, y0);
261    if (x1 != y1)
262      return do_by_bitfields (x1, y1);
263    if (x2 != y2)
264      return do_by_bitfields (x2, y2);
265    if (x3 != y3)
266      return do_by_bitfields (x3, y3);
267
268    x0 = a[8]; x1 = a[9]; x2 = a[10]; x3 = a[11];
269    y0 = b[8]; y1 = b[9]; y2 = b[10]; y3 = b[11];
270    if (x0 != y0)
271      return do_by_bitfields (x0, y0);
272    if (x1 != y1)
273      return do_by_bitfields (x1, y1);
274    if (x2 != y2)
275      return do_by_bitfields (x2, y2);
276    if (x3 != y3)
277      return do_by_bitfields (x3, y3);
278
279    a += 12;
280    b += 12;
281  }
282
283  for (; words >= 4; words -= 4) {
284    reg_t x0 = a[0], x1 = a[1], x2 = a[2], x3 = a[3];
285    reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
286    if (x0 != y0)
287      return do_by_bitfields (x0, y0);
288    if (x1 != y1)
289      return do_by_bitfields (x1, y1);
290    if (x2 != y2)
291      return do_by_bitfields (x2, y2);
292    if (x3 != y3)
293      return do_by_bitfields (x3, y3);
294    a += 4;
295    b += 4;
296  }
297
298  /* do remaining words.  */
299  while (words--) {
300    reg_t x0 = *a;
301    reg_t y0 = *b;
302    a += 1;
303    b += 1;
304    if (x0 != y0)
305      return do_by_bitfields (x0, y0);
306  }
307
308  /* mop up any remaining bytes.  */
309  return do_bytes (a, b, bytes);
310}
311
312int memcmp (const void *a, const void *b, size_t len)
313{
314  unsigned long bytes, words;
315
316  /* shouldn't hit that often.  */
317  if (len < sizeof (reg_t) * 4) {
318    return do_bytes (a, b, len);
319  }
320
321  /* Align the second pointer to word/dword alignment.
322     Note that the pointer is only 32-bits for o32/n32 ABIs. For
323     n32, loads are done as 64-bit while address remains 32-bit.   */
324  bytes = ((unsigned long) b) % sizeof (reg_t);
325  if (bytes) {
326    int res;
327    bytes = sizeof (reg_t) - bytes;
328    if (bytes > len)
329      bytes = len;
330    res = do_bytes (a, b, bytes);
331    if (res || len == bytes)
332      return res;
333    len -= bytes;
334    a = (const void *) (((unsigned char *) a) + bytes);
335    b = (const void *) (((unsigned char *) b) + bytes);
336  }
337
338  /* Second pointer now aligned.  */
339  words = len / sizeof (reg_t);
340  bytes = len % sizeof (reg_t);
341
342#if HW_UNALIGNED_SUPPORT
343  /* treat possible unaligned first pointer as aligned.  */
344  return aligned_words (a, b, words, bytes);
345#else
346  if (((unsigned long) a) % sizeof (reg_t) == 0) {
347    return aligned_words (a, b, words, bytes);
348  }
349  /* need to use unaligned instructions on first pointer.  */
350  return unaligned_words (a, b, words, bytes);
351#endif
352}
353