11176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* 21176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Copyright (c) 2012 31176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * MIPS Technologies, Inc., California. 41176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * 51176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Redistribution and use in source and binary forms, with or without 61176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * modification, are permitted provided that the following conditions 71176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * are met: 81176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * 1. Redistributions of source code must retain the above copyright 91176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * notice, this list of conditions and the following disclaimer. 101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * 2. Redistributions in binary form must reproduce the above copyright 111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * notice, this list of conditions and the following disclaimer in the 121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * documentation and/or other materials provided with the distribution. 131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its 141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * contributors may be used to endorse or promote products derived from 151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * this software without specific prior written permission. 161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * 171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND 181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE 211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * SUCH DAMAGE. 281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */ 291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#include "pixman-mips-dspr2-asm.h" 311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* 331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * This routine could be optimized for MIPS64. The current code only 341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * uses MIPS32 instructions. 351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */ 361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#ifdef EB 381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck# define LWHI lwl /* high part is left in big-endian */ 391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck# define SWHI swl /* high part is left in big-endian */ 401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck# define LWLO lwr /* low part is right in big-endian */ 411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck# define SWLO swr /* low part is right in big-endian */ 421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#else 431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck# define LWHI lwr /* high part is right in little-endian */ 441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck# define SWHI swr /* high part is right in little-endian */ 451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck# define LWLO lwl /* low part is left in big-endian */ 461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck# define SWLO swl /* low part is left in big-endian */ 471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck#endif 481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 491176bdada62cabc6ec4b0308a930e83b679d5d36John ReckLEAF_MIPS32R2(pixman_mips_fast_memcpy) 501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck slti AT, a2, 8 521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bne AT, zero, $last8 531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck move v0, a0 /* memcpy returns the dst pointer */ 541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* Test if the src and dst are word-aligned, or can be made word-aligned */ 561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck xor t8, a1, a0 571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck andi t8, t8, 0x3 /* t8 is a0/a1 word-displacement */ 581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bne t8, zero, $unaligned 601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck negu a3, a0 611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck andi a3, a3, 0x3 /* we need to copy a3 bytes to make a0/a1 aligned */ 631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck beq a3, zero, $chk16w /* when a3=0 then the dst (a0) is word-aligned */ 641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck subu a2, a2, a3 /* now a2 is the remining bytes count */ 651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t8, 0(a1) 671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addu a1, a1, a3 681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck SWHI t8, 0(a0) 691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addu a0, a0, a3 701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* Now the dst/src are mutually word-aligned with word-aligned addresses */ 721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck$chk16w: andi t8, a2, 0x3f /* any whole 64-byte chunks? */ 731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* t8 is the byte count after 64-byte chunks */ 741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck beq a2, t8, $chk8w /* if a2==t8, no 64-byte chunks */ 761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* There will be at most 1 32-byte chunk after it */ 771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck subu a3, a2, t8 /* subtract from a2 the reminder */ 781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* Here a3 counts bytes in 16w chunks */ 791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addu a3, a0, a3 /* Now a3 is the final dst after 64-byte chunks */ 801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addu t0, a0, a2 /* t0 is the "past the end" address */ 821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* 841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * When in the loop we exercise "pref 30, x(a0)", the a0+x should not be past 851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * the "t0-32" address 861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * This means: for x=128 the last "safe" a0 address is "t0-160" 871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * Alternatively, for x=64 the last "safe" a0 address is "t0-96" 881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * In the current version we use "pref 30, 128(a0)", so "t0-160" is the limit 891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */ 901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck subu t9, t0, 160 /* t9 is the "last safe pref 30, 128(a0)" address */ 911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pref 0, 0(a1) /* bring the first line of src, addr 0 */ 931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pref 0, 32(a1) /* bring the second line of src, addr 32 */ 941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pref 0, 64(a1) /* bring the third line of src, addr 64 */ 951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pref 30, 32(a0) /* safe, as we have at least 64 bytes ahead */ 961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* In case the a0 > t9 don't use "pref 30" at all */ 971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sgtu v1, a0, t9 981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bgtz v1, $loop16w /* skip "pref 30, 64(a0)" for too short arrays */ 991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck nop 1001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* otherwise, start with using pref30 */ 1011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pref 30, 64(a0) 1021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck$loop16w: 1031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pref 0, 96(a1) 1041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t0, 0(a1) 1051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bgtz v1, $skip_pref30_96 /* skip "pref 30, 96(a0)" */ 1061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t1, 4(a1) 1071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pref 30, 96(a0) /* continue setting up the dest, addr 96 */ 1081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck$skip_pref30_96: 1091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t2, 8(a1) 1101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t3, 12(a1) 1111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t4, 16(a1) 1121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t5, 20(a1) 1131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t6, 24(a1) 1141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t7, 28(a1) 1151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pref 0, 128(a1) /* bring the next lines of src, addr 128 */ 1161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 1171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t0, 0(a0) 1181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t1, 4(a0) 1191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t2, 8(a0) 1201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t3, 12(a0) 1211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t4, 16(a0) 1221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t5, 20(a0) 1231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t6, 24(a0) 1241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t7, 28(a0) 1251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 1261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t0, 32(a1) 1271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bgtz v1, $skip_pref30_128 /* skip "pref 30, 128(a0)" */ 1281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t1, 36(a1) 1291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pref 30, 128(a0) /* continue setting up the dest, addr 128 */ 1301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck$skip_pref30_128: 1311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t2, 40(a1) 1321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t3, 44(a1) 1331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t4, 48(a1) 1341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t5, 52(a1) 1351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t6, 56(a1) 1361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t7, 60(a1) 1371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pref 0, 160(a1) /* bring the next lines of src, addr 160 */ 1381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 1391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t0, 32(a0) 1401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t1, 36(a0) 1411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t2, 40(a0) 1421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t3, 44(a0) 1431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t4, 48(a0) 1441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t5, 52(a0) 1451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t6, 56(a0) 1461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t7, 60(a0) 1471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 1481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addiu a0, a0, 64 /* adding 64 to dest */ 1491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sgtu v1, a0, t9 1501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bne a0, a3, $loop16w 1511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addiu a1, a1, 64 /* adding 64 to src */ 1521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck move a2, t8 1531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 1541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* Here we have src and dest word-aligned but less than 64-bytes to go */ 1551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 1561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck$chk8w: 1571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pref 0, 0x0(a1) 1581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck andi t8, a2, 0x1f /* is there a 32-byte chunk? */ 1591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* the t8 is the reminder count past 32-bytes */ 1601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck beq a2, t8, $chk1w /* when a2=t8, no 32-byte chunk */ 1611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck nop 1621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 1631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t0, 0(a1) 1641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t1, 4(a1) 1651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t2, 8(a1) 1661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t3, 12(a1) 1671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t4, 16(a1) 1681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t5, 20(a1) 1691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t6, 24(a1) 1701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t7, 28(a1) 1711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addiu a1, a1, 32 1721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 1731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t0, 0(a0) 1741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t1, 4(a0) 1751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t2, 8(a0) 1761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t3, 12(a0) 1771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t4, 16(a0) 1781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t5, 20(a0) 1791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t6, 24(a0) 1801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t7, 28(a0) 1811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addiu a0, a0, 32 1821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 1831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck$chk1w: 1841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck andi a2, t8, 0x3 /* now a2 is the reminder past 1w chunks */ 1851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck beq a2, t8, $last8 1861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck subu a3, t8, a2 /* a3 is count of bytes in 1w chunks */ 1871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addu a3, a0, a3 /* now a3 is the dst address past the 1w chunks */ 1881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 1891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* copying in words (4-byte chunks) */ 1901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck$wordCopy_loop: 1911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lw t3, 0(a1) /* the first t3 may be equal t0 ... optimize? */ 1921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addiu a1, a1, 4 1931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addiu a0, a0, 4 1941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bne a0, a3, $wordCopy_loop 1951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t3, -4(a0) 1961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 1971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* For the last (<8) bytes */ 1981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck$last8: 1991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck blez a2, leave 2001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addu a3, a0, a2 /* a3 is the last dst address */ 2011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck$last8loop: 2021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lb v1, 0(a1) 2031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addiu a1, a1, 1 2041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addiu a0, a0, 1 2051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bne a0, a3, $last8loop 2061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sb v1, -1(a0) 2071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 2081176bdada62cabc6ec4b0308a930e83b679d5d36John Reckleave: j ra 2091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck nop 2101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 2111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* 2121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck * UNALIGNED case 2131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck */ 2141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 2151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck$unaligned: 2161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* got here with a3="negu a0" */ 2171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck andi a3, a3, 0x3 /* test if the a0 is word aligned */ 2181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck beqz a3, $ua_chk16w 2191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck subu a2, a2, a3 /* bytes left after initial a3 bytes */ 2201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 2211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI v1, 0(a1) 2221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO v1, 3(a1) 2231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addu a1, a1, a3 /* a3 may be here 1, 2 or 3 */ 2241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck SWHI v1, 0(a0) 2251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addu a0, a0, a3 /* below the dst will be word aligned (NOTE1) */ 2261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 2271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck$ua_chk16w: andi t8, a2, 0x3f /* any whole 64-byte chunks? */ 2281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* t8 is the byte count after 64-byte chunks */ 2291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck beq a2, t8, $ua_chk8w /* if a2==t8, no 64-byte chunks */ 2301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* There will be at most 1 32-byte chunk after it */ 2311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck subu a3, a2, t8 /* subtract from a2 the reminder */ 2321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* Here a3 counts bytes in 16w chunks */ 2331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addu a3, a0, a3 /* Now a3 is the final dst after 64-byte chunks */ 2341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 2351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addu t0, a0, a2 /* t0 is the "past the end" address */ 2361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 2371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck subu t9, t0, 160 /* t9 is the "last safe pref 30, 128(a0)" address */ 2381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 2391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pref 0, 0(a1) /* bring the first line of src, addr 0 */ 2401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pref 0, 32(a1) /* bring the second line of src, addr 32 */ 2411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pref 0, 64(a1) /* bring the third line of src, addr 64 */ 2421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pref 30, 32(a0) /* safe, as we have at least 64 bytes ahead */ 2431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* In case the a0 > t9 don't use "pref 30" at all */ 2441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sgtu v1, a0, t9 2451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bgtz v1, $ua_loop16w /* skip "pref 30, 64(a0)" for too short arrays */ 2461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck nop 2471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* otherwise, start with using pref30 */ 2481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pref 30, 64(a0) 2491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck$ua_loop16w: 2501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pref 0, 96(a1) 2511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t0, 0(a1) 2521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t0, 3(a1) 2531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t1, 4(a1) 2541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bgtz v1, $ua_skip_pref30_96 2551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t1, 7(a1) 2561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pref 30, 96(a0) /* continue setting up the dest, addr 96 */ 2571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck$ua_skip_pref30_96: 2581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t2, 8(a1) 2591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t2, 11(a1) 2601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t3, 12(a1) 2611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t3, 15(a1) 2621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t4, 16(a1) 2631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t4, 19(a1) 2641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t5, 20(a1) 2651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t5, 23(a1) 2661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t6, 24(a1) 2671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t6, 27(a1) 2681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t7, 28(a1) 2691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t7, 31(a1) 2701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pref 0, 128(a1) /* bring the next lines of src, addr 128 */ 2711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 2721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t0, 0(a0) 2731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t1, 4(a0) 2741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t2, 8(a0) 2751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t3, 12(a0) 2761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t4, 16(a0) 2771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t5, 20(a0) 2781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t6, 24(a0) 2791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t7, 28(a0) 2801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 2811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t0, 32(a1) 2821176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t0, 35(a1) 2831176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t1, 36(a1) 2841176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bgtz v1, $ua_skip_pref30_128 2851176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t1, 39(a1) 2861176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pref 30, 128(a0) /* continue setting up the dest, addr 128 */ 2871176bdada62cabc6ec4b0308a930e83b679d5d36John Reck$ua_skip_pref30_128: 2881176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t2, 40(a1) 2891176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t2, 43(a1) 2901176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t3, 44(a1) 2911176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t3, 47(a1) 2921176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t4, 48(a1) 2931176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t4, 51(a1) 2941176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t5, 52(a1) 2951176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t5, 55(a1) 2961176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t6, 56(a1) 2971176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t6, 59(a1) 2981176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t7, 60(a1) 2991176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t7, 63(a1) 3001176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pref 0, 160(a1) /* bring the next lines of src, addr 160 */ 3011176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 3021176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t0, 32(a0) 3031176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t1, 36(a0) 3041176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t2, 40(a0) 3051176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t3, 44(a0) 3061176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t4, 48(a0) 3071176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t5, 52(a0) 3081176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t6, 56(a0) 3091176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t7, 60(a0) 3101176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 3111176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addiu a0, a0, 64 /* adding 64 to dest */ 3121176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sgtu v1, a0, t9 3131176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bne a0, a3, $ua_loop16w 3141176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addiu a1, a1, 64 /* adding 64 to src */ 3151176bdada62cabc6ec4b0308a930e83b679d5d36John Reck move a2, t8 3161176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 3171176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* Here we have src and dest word-aligned but less than 64-bytes to go */ 3181176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 3191176bdada62cabc6ec4b0308a930e83b679d5d36John Reck$ua_chk8w: 3201176bdada62cabc6ec4b0308a930e83b679d5d36John Reck pref 0, 0x0(a1) 3211176bdada62cabc6ec4b0308a930e83b679d5d36John Reck andi t8, a2, 0x1f /* is there a 32-byte chunk? */ 3221176bdada62cabc6ec4b0308a930e83b679d5d36John Reck /* the t8 is the reminder count */ 3231176bdada62cabc6ec4b0308a930e83b679d5d36John Reck beq a2, t8, $ua_chk1w /* when a2=t8, no 32-byte chunk */ 3241176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 3251176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t0, 0(a1) 3261176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t0, 3(a1) 3271176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t1, 4(a1) 3281176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t1, 7(a1) 3291176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t2, 8(a1) 3301176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t2, 11(a1) 3311176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t3, 12(a1) 3321176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t3, 15(a1) 3331176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t4, 16(a1) 3341176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t4, 19(a1) 3351176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t5, 20(a1) 3361176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t5, 23(a1) 3371176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t6, 24(a1) 3381176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t6, 27(a1) 3391176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI t7, 28(a1) 3401176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO t7, 31(a1) 3411176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addiu a1, a1, 32 3421176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 3431176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t0, 0(a0) 3441176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t1, 4(a0) 3451176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t2, 8(a0) 3461176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t3, 12(a0) 3471176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t4, 16(a0) 3481176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t5, 20(a0) 3491176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t6, 24(a0) 3501176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw t7, 28(a0) 3511176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addiu a0, a0, 32 3521176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 3531176bdada62cabc6ec4b0308a930e83b679d5d36John Reck$ua_chk1w: 3541176bdada62cabc6ec4b0308a930e83b679d5d36John Reck andi a2, t8, 0x3 /* now a2 is the reminder past 1w chunks */ 3551176bdada62cabc6ec4b0308a930e83b679d5d36John Reck beq a2, t8, $ua_smallCopy 3561176bdada62cabc6ec4b0308a930e83b679d5d36John Reck subu a3, t8, a2 /* a3 is count of bytes in 1w chunks */ 3571176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addu a3, a0, a3 /* now a3 is the dst address past the 1w chunks */ 3581176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 3591176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* copying in words (4-byte chunks) */ 3601176bdada62cabc6ec4b0308a930e83b679d5d36John Reck$ua_wordCopy_loop: 3611176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWHI v1, 0(a1) 3621176bdada62cabc6ec4b0308a930e83b679d5d36John Reck LWLO v1, 3(a1) 3631176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addiu a1, a1, 4 3641176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addiu a0, a0, 4 /* note: dst=a0 is word aligned here, see NOTE1 */ 3651176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bne a0, a3, $ua_wordCopy_loop 3661176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sw v1, -4(a0) 3671176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 3681176bdada62cabc6ec4b0308a930e83b679d5d36John Reck/* Now less than 4 bytes (value in a2) left to copy */ 3691176bdada62cabc6ec4b0308a930e83b679d5d36John Reck$ua_smallCopy: 3701176bdada62cabc6ec4b0308a930e83b679d5d36John Reck beqz a2, leave 3711176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addu a3, a0, a2 /* a3 is the last dst address */ 3721176bdada62cabc6ec4b0308a930e83b679d5d36John Reck$ua_smallCopy_loop: 3731176bdada62cabc6ec4b0308a930e83b679d5d36John Reck lb v1, 0(a1) 3741176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addiu a1, a1, 1 3751176bdada62cabc6ec4b0308a930e83b679d5d36John Reck addiu a0, a0, 1 3761176bdada62cabc6ec4b0308a930e83b679d5d36John Reck bne a0, a3, $ua_smallCopy_loop 3771176bdada62cabc6ec4b0308a930e83b679d5d36John Reck sb v1, -1(a0) 3781176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 3791176bdada62cabc6ec4b0308a930e83b679d5d36John Reck j ra 3801176bdada62cabc6ec4b0308a930e83b679d5d36John Reck nop 3811176bdada62cabc6ec4b0308a930e83b679d5d36John Reck 3821176bdada62cabc6ec4b0308a930e83b679d5d36John ReckEND(pixman_mips_fast_memcpy) 383