1/* 2 * Copyright (c) 2017 Imagination Technologies. 3 * 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * * Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer 14 * in the documentation and/or other materials provided with 15 * the distribution. 16 * * Neither the name of Imagination Technologies nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33#include <string.h> 34#include <stdint.h> 35 36#define ENABLE_PREFETCH 1 37 38#define STRNG(X) #X 39#define PREFETCH(src_ptr, offset) \ 40 asm("pref 0, " STRNG(offset) "(%[src]) \n\t" : : [src] "r" (src_ptr)); 41 42#if !defined(UNALIGNED_INSTR_SUPPORT) 43/* does target have unaligned lw/ld/ualw/uald instructions? */ 44#define UNALIGNED_INSTR_SUPPORT 0 45#if __mips_isa_rev < 6 && !__mips1 46#undef UNALIGNED_INSTR_SUPPORT 47#define UNALIGNED_INSTR_SUPPORT 1 48#endif 49#endif 50 51#if !defined(HW_UNALIGNED_SUPPORT) 52/* Does target have hardware support for unaligned accesses? */ 53#define HW_UNALIGNED_SUPPORT 0 54#if __mips_isa_rev >= 6 55#undef HW_UNALIGNED_SUPPORT 56#define HW_UNALIGNED_SUPPORT 1 57#endif 58#endif 59 60#define SIZEOF_reg_t 4 61#if _MIPS_SIM == _ABIO32 62typedef unsigned long reg_t; 63typedef struct bits 64{ 65 reg_t B0:8, B1:8, B2:8, B3:8; 66} bits_t; 67#else 68#undef SIZEOF_reg_t 69#define SIZEOF_reg_t 8 70typedef unsigned long long reg_t; 71typedef struct bits 72{ 73 reg_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8; 74} bits_t; 75#endif 76 77/* This union assumes that small structures can be in registers. If 78 not, then memory accesses will be done - not optimal, but ok. */ 79typedef union 80{ 81 reg_t v; 82 bits_t b; 83} bitfields_t; 84 85#define do_bitfield(__i) \ 86 if (x.b.B##__i != y.b.B##__i) return x.b.B##__i - y.b.B##__i; 87 88/* pull apart the words to find the first differing unsigned byte. */ 89static int __attribute__ ((noinline)) do_by_bitfields (reg_t a, reg_t b) 90{ 91 bitfields_t x, y; 92 x.v = a; 93 y.v = b; 94 do_bitfield (0); 95 do_bitfield (1); 96 do_bitfield (2); 97#if SIZEOF_reg_t == 4 98 return x.b.B3 - y.b.B3; 99#else 100 do_bitfield (3); 101 do_bitfield (4); 102 do_bitfield (5); 103 do_bitfield (6); 104 return x.b.B7 - y.b.B7; 105#endif 106} 107 108/* This code is called when aligning a pointer, there are remaining bytes 109 after doing word compares, or architecture does not have some form 110 of unaligned support. */ 111static inline int __attribute__ ((always_inline)) 112do_bytes (const void *a, const void *b, unsigned long len) 113{ 114 unsigned char *x = (unsigned char *) a; 115 unsigned char *y = (unsigned char *) b; 116 unsigned long i; 117 118 /* 'len' might be zero here, so preloading the first two values 119 before the loop may access unallocated memory. */ 120 for (i = 0; i < len; i++) { 121 if (*x != *y) 122 return *x - *y; 123 x++; 124 y++; 125 } 126 return 0; 127} 128 129#if !HW_UNALIGNED_SUPPORT 130#if UNALIGNED_INSTR_SUPPORT 131/* for MIPS GCC, there are no unaligned builtins - so this struct forces 132 the compiler to treat the pointer access as unaligned. */ 133struct ulw 134{ 135 reg_t uli; 136} __attribute__ ((packed)); 137 138/* first pointer is not aligned while second pointer is. */ 139static int unaligned_words (const struct ulw *a, const reg_t *b, 140 unsigned long words, unsigned long bytes) 141{ 142#if ENABLE_PREFETCH 143 /* prefetch pointer aligned to 32 byte boundary */ 144 const reg_t *pref_ptr = (const reg_t *) (((uintptr_t) b + 31) & ~31); 145 const reg_t *pref_ptr_a = (const reg_t *) (((uintptr_t) a + 31) & ~31); 146#endif 147 for (; words >= 16; words -= 8) { 148#if ENABLE_PREFETCH 149 pref_ptr += 8; 150 PREFETCH(pref_ptr, 0); 151 PREFETCH(pref_ptr, 32); 152 153 pref_ptr_a += 8; 154 PREFETCH(pref_ptr_a, 0); 155 PREFETCH(pref_ptr_a, 32); 156#endif 157 reg_t x0 = a[0].uli, x1 = a[1].uli; 158 reg_t x2 = a[2].uli, x3 = a[3].uli; 159 reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3]; 160 if (x0 != y0) 161 return do_by_bitfields (x0, y0); 162 if (x1 != y1) 163 return do_by_bitfields (x1, y1); 164 if (x2 != y2) 165 return do_by_bitfields (x2, y2); 166 if (x3 != y3) 167 return do_by_bitfields (x3, y3); 168 169 x0 = a[4].uli; x1 = a[5].uli; 170 x2 = a[6].uli; x3 = a[7].uli; 171 y0 = b[4]; y1 = b[5]; y2 = b[6]; y3 = b[7]; 172 if (x0 != y0) 173 return do_by_bitfields (x0, y0); 174 if (x1 != y1) 175 return do_by_bitfields (x1, y1); 176 if (x2 != y2) 177 return do_by_bitfields (x2, y2); 178 if (x3 != y3) 179 return do_by_bitfields (x3, y3); 180 181 a += 8; 182 b += 8; 183 } 184 185 for (; words >= 4; words -= 4) { 186 reg_t x0 = a[0].uli, x1 = a[1].uli; 187 reg_t x2 = a[2].uli, x3 = a[3].uli; 188 reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3]; 189 if (x0 != y0) 190 return do_by_bitfields (x0, y0); 191 if (x1 != y1) 192 return do_by_bitfields (x1, y1); 193 if (x2 != y2) 194 return do_by_bitfields (x2, y2); 195 if (x3 != y3) 196 return do_by_bitfields (x3, y3); 197 a += 4; 198 b += 4; 199 } 200 201 /* do remaining words. */ 202 while (words--) { 203 reg_t x0 = a->uli; 204 reg_t y0 = *b; 205 a += 1; 206 b += 1; 207 if (x0 != y0) 208 return do_by_bitfields (x0, y0); 209 } 210 211 /* mop up any remaining bytes. */ 212 return do_bytes (a, b, bytes); 213} 214#else 215/* no HW support or unaligned lw/ld/ualw/uald instructions. */ 216static int unaligned_words (const reg_t *a, const reg_t *b, 217 unsigned long words, unsigned long bytes) 218{ 219 return do_bytes (a, b, (sizeof (reg_t) * words) + bytes); 220} 221#endif /* UNALIGNED_INSTR_SUPPORT */ 222#endif /* HW_UNALIGNED_SUPPORT */ 223 224/* both pointers are aligned, or first isn't and HW support for unaligned. */ 225static int aligned_words (const reg_t *a, const reg_t *b, 226 unsigned long words, unsigned long bytes) 227{ 228#if ENABLE_PREFETCH 229 /* prefetch pointer aligned to 32 byte boundary */ 230 const reg_t *pref_ptr = (const reg_t *) (((uintptr_t) b + 31) & ~31); 231 const reg_t *pref_ptr_a = (const reg_t *) (((uintptr_t) a + 31) & ~31); 232#endif 233 234 for (; words >= 24; words -= 12) { 235#if ENABLE_PREFETCH 236 pref_ptr += 12; 237 PREFETCH(pref_ptr, 0); 238 PREFETCH(pref_ptr, 32); 239 PREFETCH(pref_ptr, 64); 240 241 pref_ptr_a += 12; 242 PREFETCH(pref_ptr_a, 0); 243 PREFETCH(pref_ptr_a, 32); 244 PREFETCH(pref_ptr_a, 64); 245#endif 246 reg_t x0 = a[0], x1 = a[1], x2 = a[2], x3 = a[3]; 247 reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3]; 248 if (x0 != y0) 249 return do_by_bitfields (x0, y0); 250 if (x1 != y1) 251 return do_by_bitfields (x1, y1); 252 if (x2 != y2) 253 return do_by_bitfields (x2, y2); 254 if (x3 != y3) 255 return do_by_bitfields (x3, y3); 256 257 x0 = a[4]; x1 = a[5]; x2 = a[6]; x3 = a[7]; 258 y0 = b[4]; y1 = b[5]; y2 = b[6]; y3 = b[7]; 259 if (x0 != y0) 260 return do_by_bitfields (x0, y0); 261 if (x1 != y1) 262 return do_by_bitfields (x1, y1); 263 if (x2 != y2) 264 return do_by_bitfields (x2, y2); 265 if (x3 != y3) 266 return do_by_bitfields (x3, y3); 267 268 x0 = a[8]; x1 = a[9]; x2 = a[10]; x3 = a[11]; 269 y0 = b[8]; y1 = b[9]; y2 = b[10]; y3 = b[11]; 270 if (x0 != y0) 271 return do_by_bitfields (x0, y0); 272 if (x1 != y1) 273 return do_by_bitfields (x1, y1); 274 if (x2 != y2) 275 return do_by_bitfields (x2, y2); 276 if (x3 != y3) 277 return do_by_bitfields (x3, y3); 278 279 a += 12; 280 b += 12; 281 } 282 283 for (; words >= 4; words -= 4) { 284 reg_t x0 = a[0], x1 = a[1], x2 = a[2], x3 = a[3]; 285 reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3]; 286 if (x0 != y0) 287 return do_by_bitfields (x0, y0); 288 if (x1 != y1) 289 return do_by_bitfields (x1, y1); 290 if (x2 != y2) 291 return do_by_bitfields (x2, y2); 292 if (x3 != y3) 293 return do_by_bitfields (x3, y3); 294 a += 4; 295 b += 4; 296 } 297 298 /* do remaining words. */ 299 while (words--) { 300 reg_t x0 = *a; 301 reg_t y0 = *b; 302 a += 1; 303 b += 1; 304 if (x0 != y0) 305 return do_by_bitfields (x0, y0); 306 } 307 308 /* mop up any remaining bytes. */ 309 return do_bytes (a, b, bytes); 310} 311 312int memcmp (const void *a, const void *b, size_t len) 313{ 314 unsigned long bytes, words; 315 316 /* shouldn't hit that often. */ 317 if (len < sizeof (reg_t) * 4) { 318 return do_bytes (a, b, len); 319 } 320 321 /* Align the second pointer to word/dword alignment. 322 Note that the pointer is only 32-bits for o32/n32 ABIs. For 323 n32, loads are done as 64-bit while address remains 32-bit. */ 324 bytes = ((unsigned long) b) % sizeof (reg_t); 325 if (bytes) { 326 int res; 327 bytes = sizeof (reg_t) - bytes; 328 if (bytes > len) 329 bytes = len; 330 res = do_bytes (a, b, bytes); 331 if (res || len == bytes) 332 return res; 333 len -= bytes; 334 a = (const void *) (((unsigned char *) a) + bytes); 335 b = (const void *) (((unsigned char *) b) + bytes); 336 } 337 338 /* Second pointer now aligned. */ 339 words = len / sizeof (reg_t); 340 bytes = len % sizeof (reg_t); 341 342#if HW_UNALIGNED_SUPPORT 343 /* treat possible unaligned first pointer as aligned. */ 344 return aligned_words (a, b, words, bytes); 345#else 346 if (((unsigned long) a) % sizeof (reg_t) == 0) { 347 return aligned_words (a, b, words, bytes); 348 } 349 /* need to use unaligned instructions on first pointer. */ 350 return unaligned_words (a, b, words, bytes); 351#endif 352} 353