14466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt/* Cloned and hacked for uClibc by Paul Mundt, December 2003 */ 24466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt/* Modified by SuperH, Inc. September 2003 */ 34466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! 44466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! Fast SH memcpy 54466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! 64466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! by Toshiyasu Morita (tm@netcom.com) 74466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! hacked by J"orn Rernnecke (joern.rennecke@superh.com) ("o for o-umlaut) 84466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! SH5 code Copyright 2002 SuperH Ltd. 94466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! 104466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! Entry: ARG0: destination pointer 114466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! ARG1: source pointer 124466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! ARG2: byte count 134466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! 144466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! Exit: RESULT: destination pointer 154466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! any other registers in the range r0-r7: trashed 164466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! 174466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! Notes: Usually one wants to do small reads and write a longword, but 184466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! unfortunately it is difficult in some cases to concatanate bytes 194466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! into a longword on the SH, so this does a longword read and small 204466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! writes. 214466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! 224466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! This implementation makes two assumptions about how it is called: 234466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! 244466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! 1.: If the byte count is nonzero, the address of the last byte to be 254466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! copied is unsigned greater than the address of the first byte to 264466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! be copied. This could be easily swapped for a signed comparison, 274466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! but the algorithm used needs some comparison. 284466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! 294466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! 2.: When there are two or three bytes in the last word of an 11-or-more 304466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! bytes memory chunk to b copied, the rest of the word can be read 314466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! without side effects. 3225985edcedea6396277003854657b5f3cb31a628Lucas De Marchi! This could be easily changed by increasing the minimum size of 334466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! a fast memcpy and the amount subtracted from r7 before L_2l_loop be 2, 344466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! however, this would cost a few extra cyles on average. 354466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! For SHmedia, the assumption is that any quadword can be read in its 364466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! enirety if at least one byte is included in the copy. 374466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt! 384466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt 394466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt .section .text..SHmedia32,"ax" 404466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt .globl memcpy 414466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt .type memcpy, @function 424466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt .align 5 434466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt 444466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundtmemcpy: 454466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt 464466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt#define LDUAQ(P,O,D0,D1) ldlo.q P,O,D0; ldhi.q P,O+7,D1 474466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt#define STUAQ(P,O,D0,D1) stlo.q P,O,D0; sthi.q P,O+7,D1 484466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt#define LDUAL(P,O,D0,D1) ldlo.l P,O,D0; ldhi.l P,O+3,D1 494466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt#define STUAL(P,O,D0,D1) stlo.l P,O,D0; sthi.l P,O+3,D1 504466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt 514466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ld.b r3,0,r63 524466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt pta/l Large,tr0 534466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt movi 25,r0 544466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt bgeu/u r4,r0,tr0 554466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt nsb r4,r0 564466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt shlli r0,5,r0 574466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt movi (L1-L0+63*32 + 1) & 0xffff,r1 584466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt sub r1, r0, r0 594466b20cfcfa718ff515b9e3886749cc025e2005Paul MundtL0: ptrel r0,tr0 604466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt add r2,r4,r5 614466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ptabs r18,tr1 624466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt add r3,r4,r6 634466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt blink tr0,r63 644466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt 654466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt/* Rearranged to make cut2 safe */ 664466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt .balign 8 674466b20cfcfa718ff515b9e3886749cc025e2005Paul MundtL4_7: /* 4..7 byte memcpy cntd. */ 684466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt stlo.l r2, 0, r0 694466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt or r6, r7, r6 704466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt sthi.l r5, -1, r6 714466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt stlo.l r5, -4, r6 724466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt blink tr1,r63 734466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt 744466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt .balign 8 754466b20cfcfa718ff515b9e3886749cc025e2005Paul MundtL1: /* 0 byte memcpy */ 764466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt nop 774466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt blink tr1,r63 784466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt nop 794466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt nop 804466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt nop 814466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt nop 824466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt 834466b20cfcfa718ff515b9e3886749cc025e2005Paul MundtL2_3: /* 2 or 3 byte memcpy cntd. */ 844466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt st.b r5,-1,r6 854466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt blink tr1,r63 864466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt 874466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt /* 1 byte memcpy */ 884466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ld.b r3,0,r0 894466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt st.b r2,0,r0 904466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt blink tr1,r63 914466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt 924466b20cfcfa718ff515b9e3886749cc025e2005Paul MundtL8_15: /* 8..15 byte memcpy cntd. */ 934466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt stlo.q r2, 0, r0 944466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt or r6, r7, r6 954466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt sthi.q r5, -1, r6 964466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt stlo.q r5, -8, r6 974466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt blink tr1,r63 984466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt 994466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt /* 2 or 3 byte memcpy */ 1004466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ld.b r3,0,r0 1014466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ld.b r2,0,r63 1024466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ld.b r3,1,r1 1034466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt st.b r2,0,r0 1044466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt pta/l L2_3,tr0 1054466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ld.b r6,-1,r6 1064466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt st.b r2,1,r1 1074466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt blink tr0, r63 1084466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt 1094466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt /* 4 .. 7 byte memcpy */ 1104466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt LDUAL (r3, 0, r0, r1) 1114466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt pta L4_7, tr0 1124466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ldlo.l r6, -4, r7 1134466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt or r0, r1, r0 1144466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt sthi.l r2, 3, r0 1154466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ldhi.l r6, -1, r6 1164466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt blink tr0, r63 1174466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt 1184466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt /* 8 .. 15 byte memcpy */ 1194466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt LDUAQ (r3, 0, r0, r1) 1204466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt pta L8_15, tr0 1214466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ldlo.q r6, -8, r7 1224466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt or r0, r1, r0 1234466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt sthi.q r2, 7, r0 1244466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ldhi.q r6, -1, r6 1254466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt blink tr0, r63 1264466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt 1274466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt /* 16 .. 24 byte memcpy */ 1284466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt LDUAQ (r3, 0, r0, r1) 1294466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt LDUAQ (r3, 8, r8, r9) 1304466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt or r0, r1, r0 1314466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt sthi.q r2, 7, r0 1324466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt or r8, r9, r8 1334466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt sthi.q r2, 15, r8 1344466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ldlo.q r6, -8, r7 1354466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ldhi.q r6, -1, r6 1364466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt stlo.q r2, 8, r8 1374466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt stlo.q r2, 0, r0 1384466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt or r6, r7, r6 1394466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt sthi.q r5, -1, r6 1404466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt stlo.q r5, -8, r6 1414466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt blink tr1,r63 1424466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt 1434466b20cfcfa718ff515b9e3886749cc025e2005Paul MundtLarge: 1444466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ld.b r2, 0, r63 1454466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt pta/l Loop_ua, tr1 1464466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ori r3, -8, r7 1474466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt sub r2, r7, r22 1484466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt sub r3, r2, r6 1494466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt add r2, r4, r5 1504466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ldlo.q r3, 0, r0 1514466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt addi r5, -16, r5 1524466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt movi 64+8, r27 // could subtract r7 from that. 1534466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt stlo.q r2, 0, r0 1544466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt sthi.q r2, 7, r0 1554466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ldx.q r22, r6, r0 1564466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt bgtu/l r27, r4, tr1 1574466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt 1584466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt addi r5, -48, r27 1594466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt pta/l Loop_line, tr0 1604466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt addi r6, 64, r36 1614466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt addi r6, -24, r19 1624466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt addi r6, -16, r20 1634466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt addi r6, -8, r21 1644466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt 1654466b20cfcfa718ff515b9e3886749cc025e2005Paul MundtLoop_line: 1664466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ldx.q r22, r36, r63 1674466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt alloco r22, 32 1684466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt addi r22, 32, r22 1694466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ldx.q r22, r19, r23 1704466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt sthi.q r22, -25, r0 1714466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ldx.q r22, r20, r24 1724466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ldx.q r22, r21, r25 1734466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt stlo.q r22, -32, r0 1744466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ldx.q r22, r6, r0 1754466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt sthi.q r22, -17, r23 1764466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt sthi.q r22, -9, r24 1774466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt sthi.q r22, -1, r25 1784466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt stlo.q r22, -24, r23 1794466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt stlo.q r22, -16, r24 1804466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt stlo.q r22, -8, r25 1814466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt bgeu r27, r22, tr0 1824466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt 1834466b20cfcfa718ff515b9e3886749cc025e2005Paul MundtLoop_ua: 1844466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt addi r22, 8, r22 1854466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt sthi.q r22, -1, r0 1864466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt stlo.q r22, -8, r0 1874466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ldx.q r22, r6, r0 1884466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt bgtu/l r5, r22, tr1 1894466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt 1904466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt add r3, r4, r7 1914466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ldlo.q r7, -8, r1 1924466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt sthi.q r22, 7, r0 1934466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ldhi.q r7, -1, r7 1944466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt ptabs r18,tr1 1954466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt stlo.q r22, 0, r0 1964466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt or r1, r7, r1 1974466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt sthi.q r5, 15, r1 1984466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt stlo.q r5, 8, r1 1994466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt blink tr1, r63 2004466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt 2014466b20cfcfa718ff515b9e3886749cc025e2005Paul Mundt .size memcpy,.-memcpy 202