11d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf/*************************************************************************** 21d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf Copyright (c) 2009-2013 The Linux Foundation. All rights reserved. 31d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 41d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf Redistribution and use in source and binary forms, with or without 51d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf modification, are permitted provided that the following conditions are met: 61d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf * Redistributions of source code must retain the above copyright 71d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf notice, this list of conditions and the following disclaimer. 81d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf * Redistributions in binary form must reproduce the above copyright 91d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf notice, this list of conditions and the following disclaimer in the 101d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf documentation and/or other materials provided with the distribution. 111d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf * Neither the name of The Linux Foundation nor the names of its contributors may 121d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf be used to endorse or promote products derived from this software 131d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf without specific prior written permission. 141d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 151d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 161d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 171d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 181d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 191d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 201d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 211d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 221d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 231d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 241d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 251d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf POSSIBILITY OF SUCH DAMAGE. 261d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf ***************************************************************************/ 271d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 281d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf/* Assumes neon instructions and a cache line size of 64 bytes. */ 291d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 301d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf#define PLDOFFS (10) 311d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf#define PLDTHRESH (PLDOFFS) 321d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf#define BBTHRESH (4096/64) 331d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf#define PLDSIZE (64) 341d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 351d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf#if (PLDOFFS < 1) 361d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf#error Routine does not support offsets less than 1 371d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf#endif 381d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 391d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf#if (PLDTHRESH < PLDOFFS) 401d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf#error PLD threshold must be greater than or equal to the PLD offset 411d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf#endif 421d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 431d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf .text 441d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf .fpu neon 451d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 461d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_memcpy_base: 471d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf cmp r2, #4 481d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf blt .L_neon_lt4 491d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf cmp r2, #16 501d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf blt .L_neon_lt16 511d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf cmp r2, #32 521d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf blt .L_neon_16 531d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf cmp r2, #64 541d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf blt .L_neon_copy_32_a 551d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 561d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf mov r12, r2, lsr #6 571d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf cmp r12, #PLDTHRESH 581d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf ble .L_neon_copy_64_loop_nopld 591d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 601d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf push {r9, r10} 611d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf .cfi_adjust_cfa_offset 8 621d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf .cfi_rel_offset r9, 0 631d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf .cfi_rel_offset r10, 4 641d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 651d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf cmp r12, #BBTHRESH 661d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf ble .L_neon_prime_pump 671d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 681d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf add lr, r0, #0x400 691d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf add r9, r1, #(PLDOFFS*PLDSIZE) 701d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf sub lr, lr, r9 711d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf lsl lr, lr, #21 721d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf lsr lr, lr, #21 731d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf add lr, lr, #(PLDOFFS*PLDSIZE) 741d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf cmp r12, lr, lsr #6 751d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf ble .L_neon_prime_pump 761d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 771d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf itt gt 781d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf movgt r9, #(PLDOFFS) 791d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf rsbsgt r9, r9, lr, lsr #6 801d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf ble .L_neon_prime_pump 811d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 821d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf add r10, r1, lr 831d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf bic r10, #0x3F 841d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 851d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf sub r12, r12, lr, lsr #6 861d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 871d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf cmp r9, r12 881d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf itee le 891d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf suble r12, r12, r9 901d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf movgt r9, r12 911d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf movgt r12, #0 921d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 931d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf pld [r1, #((PLDOFFS-1)*PLDSIZE)] 941d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_neon_copy_64_loop_outer_doublepld: 951d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf pld [r1, #((PLDOFFS)*PLDSIZE)] 961d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vld1.32 {q0, q1}, [r1]! 971d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vld1.32 {q2, q3}, [r1]! 981d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf ldr r3, [r10] 991d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf subs r9, r9, #1 1001d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vst1.32 {q0, q1}, [r0]! 1011d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vst1.32 {q2, q3}, [r0]! 1021d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf add r10, #64 1031d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf bne .L_neon_copy_64_loop_outer_doublepld 1041d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf cmp r12, #0 1051d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf beq .L_neon_pop_before_nopld 1061d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 1071d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf cmp r12, #(512*1024/64) 1081d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf blt .L_neon_copy_64_loop_outer 1091d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 1101d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_neon_copy_64_loop_ddr: 1111d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vld1.32 {q0, q1}, [r1]! 1121d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vld1.32 {q2, q3}, [r1]! 1131d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf pld [r10] 1141d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf subs r12, r12, #1 1151d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vst1.32 {q0, q1}, [r0]! 1161d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vst1.32 {q2, q3}, [r0]! 1171d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf add r10, #64 1181d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf bne .L_neon_copy_64_loop_ddr 1191d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf b .L_neon_pop_before_nopld 1201d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 1211d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_neon_prime_pump: 1221d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf mov lr, #(PLDOFFS*PLDSIZE) 1231d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf add r10, r1, #(PLDOFFS*PLDSIZE) 1241d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf bic r10, #0x3F 1251d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf sub r12, r12, #PLDOFFS 1261d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf ldr r3, [r10, #(-1*PLDSIZE)] 1271d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 1281d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_neon_copy_64_loop_outer: 1291d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vld1.32 {q0, q1}, [r1]! 1301d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vld1.32 {q2, q3}, [r1]! 1311d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf ldr r3, [r10] 1321d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf subs r12, r12, #1 1331d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vst1.32 {q0, q1}, [r0]! 1341d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vst1.32 {q2, q3}, [r0]! 1351d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf add r10, #64 1361d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf bne .L_neon_copy_64_loop_outer 1371d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 1381d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_neon_pop_before_nopld: 1391d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf mov r12, lr, lsr #6 1401d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf pop {r9, r10} 1411d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf .cfi_adjust_cfa_offset -8 1421d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf .cfi_restore r9 1431d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf .cfi_restore r10 1441d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 1451d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_neon_copy_64_loop_nopld: 1461d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vld1.32 {q8, q9}, [r1]! 1471d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vld1.32 {q10, q11}, [r1]! 1481d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf subs r12, r12, #1 1491d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vst1.32 {q8, q9}, [r0]! 1501d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vst1.32 {q10, q11}, [r0]! 1511d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf bne .L_neon_copy_64_loop_nopld 1521d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf ands r2, r2, #0x3f 1531d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf beq .L_neon_exit 1541d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 1551d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_neon_copy_32_a: 1561d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf movs r3, r2, lsl #27 1571d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf bcc .L_neon_16 1581d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vld1.32 {q0,q1}, [r1]! 1591d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vst1.32 {q0,q1}, [r0]! 1601d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 1611d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_neon_16: 1621d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf bpl .L_neon_lt16 1631d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vld1.32 {q8}, [r1]! 1641d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vst1.32 {q8}, [r0]! 1651d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf ands r2, r2, #0x0f 1661d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf beq .L_neon_exit 1671d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 1681d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_neon_lt16: 1691d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf movs r3, r2, lsl #29 1701d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf bcc 1f 1711d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vld1.8 {d0}, [r1]! 1721d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vst1.8 {d0}, [r0]! 1731d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf1: 1741d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf bge .L_neon_lt4 1751d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]! 1761d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]! 1771d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 1781d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_neon_lt4: 1791d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf movs r2, r2, lsl #31 1801d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf itt cs 1811d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf ldrhcs r3, [r1], #2 1821d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf strhcs r3, [r0], #2 1831d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf itt mi 1841d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf ldrbmi r3, [r1] 1851d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf strbmi r3, [r0] 1861d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf 1871d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_neon_exit: 1881d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf pop {r0, pc} 189