memcpy.S revision ee223d02d96815c989b62043ff1237b1cd4e14b0
11dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project/* 21dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * Copyright (C) 2008 The Android Open Source Project 31dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * All rights reserved. 41dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 51dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * Redistribution and use in source and binary forms, with or without 61dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * modification, are permitted provided that the following conditions 71dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * are met: 81dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * * Redistributions of source code must retain the above copyright 91dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * notice, this list of conditions and the following disclaimer. 101dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * * Redistributions in binary form must reproduce the above copyright 111dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * notice, this list of conditions and the following disclaimer in 121dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * the documentation and/or other materials provided with the 131dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * distribution. 141dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 151dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 161dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 171dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 181dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 191dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 201dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 211dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 221dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 231dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 241dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 251dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 261dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * SUCH DAMAGE. 271dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 281dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 291dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project#include <machine/cpu-features.h> 301dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 31ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian#if __ARM_ARCH__ == 7 || defined(__ARM_NEON__) 32ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 33ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian .text 34ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian .fpu neon 35ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 36ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian .global memcpy 37ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian .type memcpy, %function 38ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian .align 4 39ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 40ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian/* a prefetch distance of 32*4 works best experimentally */ 41ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian#define PREFETCH_DISTANCE (32*4) 42ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 43ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopianmemcpy: 44ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian .fnstart 45ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian .save {r0, lr} 46ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian stmfd sp!, {r0, lr} 47ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 48ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian /* start preloading as early as possible */ 49ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian pld [r1, #0] 50ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian pld [r1, #32] 51ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 52ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian /* do we have at least 16-bytes to copy (needed for alignment below) */ 53ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian cmp r2, #16 54ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian blo 5f 55ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 56ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian /* align destination to half cache-line for the write-buffer */ 57ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian rsb r3, r0, #0 58ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ands r3, r3, #0xF 59ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian beq 0f 60ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 61ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian /* copy up to 15-bytes (count in r3) */ 62ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian sub r2, r2, r3 63ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian movs ip, r3, lsl #31 64ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ldrmib lr, [r1], #1 65ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian strmib lr, [r0], #1 66ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ldrcsb ip, [r1], #1 67ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ldrcsb lr, [r1], #1 68ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian strcsb ip, [r0], #1 69ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian strcsb lr, [r0], #1 70ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian movs ip, r3, lsl #29 71ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian bge 1f 72ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian // copies 4 bytes, destination 32-bits aligned 73ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]! 74ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]! 75ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian1: bcc 2f 76ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian // copies 8 bytes, destination 64-bits aligned 77ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vld1.8 {d0}, [r1]! 78ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vst1.8 {d0}, [r0, :64]! 79ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian2: 80ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 81ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian0: /* preload immediately the next cache line, which we may need */ 82ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian pld [r1, #(32*0)] 83ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian pld [r1, #(32*1)] 84ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian pld [r1, #(32*2)] 85ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian pld [r1, #(32*3)] 86ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 87ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian /* make sure we have at least 128 bytes to copy */ 88ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian subs r2, r2, #128 89ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian blo 2f 90ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 91ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian /* preload all the cache lines we need. 92ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian * NOTE: the number of pld below depends on PREFETCH_DISTANCE, 93ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian * ideally would would increase the distance in the main loop to 94ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian * avoid the goofy code below. In practice this doesn't seem to make 95ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian * a big difference. 96ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian */ 97ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian pld [r1, #(PREFETCH_DISTANCE + 32*0)] 98ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian pld [r1, #(PREFETCH_DISTANCE + 32*1)] 99ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian pld [r1, #(PREFETCH_DISTANCE + 32*2)] 100ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian pld [r1, #(PREFETCH_DISTANCE + 32*3)] 101ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 102ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian1: /* The main loop copies 128 bytes at a time */ 103ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vld1.8 {d0 - d3}, [r1]! 104ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vld1.8 {d4 - d7}, [r1]! 105ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vld1.8 {d16 - d19}, [r1]! 106ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vld1.8 {d20 - d23}, [r1]! 107ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian pld [r1, #(PREFETCH_DISTANCE + 32*0)] 108ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian pld [r1, #(PREFETCH_DISTANCE + 32*1)] 109ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian pld [r1, #(PREFETCH_DISTANCE + 32*2)] 110ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian pld [r1, #(PREFETCH_DISTANCE + 32*3)] 111ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian subs r2, r2, #128 112ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vst1.8 {d0 - d3}, [r0, :128]! 113ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vst1.8 {d4 - d7}, [r0, :128]! 114ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vst1.8 {d16 - d19}, [r0, :128]! 115ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vst1.8 {d20 - d23}, [r0, :128]! 116ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian bhs 1b 117ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 118ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian2: /* fix-up the remaining count and make sure we have >= 32 bytes left */ 119ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian add r2, r2, #128 120ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian subs r2, r2, #32 121ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian blo 4f 122ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 123ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian3: /* 32 bytes at a time. These cache lines were already preloaded */ 124ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vld1.8 {d0 - d3}, [r1]! 125ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian subs r2, r2, #32 126ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vst1.8 {d0 - d3}, [r0, :128]! 127ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian bhs 3b 128ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 129ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian4: /* less than 32 left */ 130ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian add r2, r2, #32 131ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian tst r2, #0x10 132ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian beq 5f 133ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian // copies 16 bytes, 128-bits aligned 134ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vld1.8 {d0, d1}, [r1]! 135ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vst1.8 {d0, d1}, [r0, :128]! 136ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 137ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian5: /* copy up to 15-bytes (count in r2) */ 138ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian movs ip, r2, lsl #29 139ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian bcc 1f 140ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vld1.8 {d0}, [r1]! 141ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vst1.8 {d0}, [r0]! 142ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian1: bge 2f 143ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]! 144ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]! 145ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian2: movs ip, r2, lsl #31 146ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ldrmib r3, [r1], #1 147ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ldrcsb ip, [r1], #1 148ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ldrcsb lr, [r1], #1 149ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian strmib r3, [r0], #1 150ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian strcsb ip, [r0], #1 151ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian strcsb lr, [r0], #1 152ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 153ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian ldmfd sp!, {r0, lr} 154ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian bx lr 155ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian .fnend 156ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 157ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 158ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian#else /* __ARM_ARCH__ < 7 */ 159ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 160ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 1611dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project .text 1621dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 1631dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project .global memcpy 1641dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project .type memcpy, %function 1651dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project .align 4 1661dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 1671dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* 1681dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * Optimized memcpy() for ARM. 1691dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 1701dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * note that memcpy() always returns the destination pointer, 1711dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * so we have to preserve R0. 1721dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 173ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 174ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopianmemcpy: 175ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian /* The stack must always be 64-bits aligned to be compliant with the 1761dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * ARM ABI. Since we have to save R0, we might as well save R4 1771dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * which we can use for better pipelining of the reads below 1781dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 1791dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project .fnstart 1801dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project .save {r0, r4, lr} 1811dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmfd sp!, {r0, r4, lr} 1821dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* Making room for r5-r11 which will be spilled later */ 1831dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project .pad #28 1841dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project sub sp, sp, #28 1851dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 1861dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project // preload the destination because we'll align it to a cache line 1871dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project // with small writes. Also start the source "pump". 1881dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project PLD (r0, #0) 1891dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project PLD (r1, #0) 1901dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project PLD (r1, #32) 1911dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 1921dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* it simplifies things to take care of len<4 early */ 1931dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r2, #4 1941dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project blo copy_last_3_and_return 1951dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 1961dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* compute the offset to align the source 1971dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * offset = (4-(src&3))&3 = -src & 3 1981dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 1991dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project rsb r3, r1, #0 2001dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ands r3, r3, #3 2011dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project beq src_aligned 2021dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2031dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* align source to 32 bits. We need to insert 2 instructions between 2041dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * a ldr[b|h] and str[b|h] because byte and half-word instructions 2051dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * stall 2 cycles. 2061dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 2071dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movs r12, r3, lsl #31 2081dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project sub r2, r2, r3 /* we know that r3 <= r2 because r2 >= 4 */ 2091dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrmib r3, [r1], #1 2101dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrcsb r4, [r1], #1 2111dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrcsb r12,[r1], #1 2121dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strmib r3, [r0], #1 2131dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r4, [r0], #1 2141dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r12,[r0], #1 215ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 2161dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectsrc_aligned: 2171dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 218ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian /* see if src and dst are aligned together (congruent) */ 2191dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project eor r12, r0, r1 2201dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project tst r12, #3 2211dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bne non_congruent 2221dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2231dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* Use post-incriment mode for stm to spill r5-r11 to reserved stack 2241dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * frame. Don't update sp. 2251dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 2261dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmea sp, {r5-r11} 2271dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2281dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* align the destination to a cache-line */ 2291dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project rsb r3, r0, #0 2301dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ands r3, r3, #0x1C 2311dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project beq congruent_aligned32 2321dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r3, r2 2331dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project andhi r3, r2, #0x1C 2341dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2351dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* conditionnaly copies 0 to 7 words (length in r3) */ 236ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian movs r12, r3, lsl #28 2371dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmcsia r1!, {r4, r5, r6, r7} /* 16 bytes */ 2381dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmmiia r1!, {r8, r9} /* 8 bytes */ 2391dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmcsia r0!, {r4, r5, r6, r7} 2401dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmmiia r0!, {r8, r9} 2411dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project tst r3, #0x4 2421dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrne r10,[r1], #4 /* 4 bytes */ 2431dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strne r10,[r0], #4 2441dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project sub r2, r2, r3 2451dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2461dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectcongruent_aligned32: 2471dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* 2481dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * here source is aligned to 32 bytes. 2491dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 2501dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2511dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectcached_aligned32: 2521dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project subs r2, r2, #32 2531dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project blo less_than_32_left 2541dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2551dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* 2561dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * We preload a cache-line up to 64 bytes ahead. On the 926, this will 257ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian * stall only until the requested world is fetched, but the linefill 2581dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * continues in the the background. 2591dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * While the linefill is going, we write our previous cache-line 2601dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * into the write-buffer (which should have some free space). 2611dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * When the linefill is done, the writebuffer will 2621dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * start dumping its content into memory 2631dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 2641dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * While all this is going, we then load a full cache line into 2651dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 8 registers, this cache line should be in the cache by now 2661dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * (or partly in the cache). 2671dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 2681dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * This code should work well regardless of the source/dest alignment. 2691dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 2701dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 2711dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2721dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project // Align the preload register to a cache-line because the cpu does 2731dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project // "critical word first" (the first word requested is loaded first). 2741dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bic r12, r1, #0x1F 2751dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project add r12, r12, #64 2761dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2771dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project1: ldmia r1!, { r4-r11 } 2781dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project PLD (r12, #64) 2791dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project subs r2, r2, #32 2801dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2811dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project // NOTE: if r12 is more than 64 ahead of r1, the following ldrhi 2821dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project // for ARM9 preload will not be safely guarded by the preceding subs. 283ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian // When it is safely guarded the only possibility to have SIGSEGV here 2841dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project // is because the caller overstates the length. 2851dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrhi r3, [r12], #32 /* cheap ARM9 preload */ 2861dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmia r0!, { r4-r11 } 2871dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bhs 1b 288ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 2891dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project add r2, r2, #32 2901dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2911dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2921dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2931dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2941dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectless_than_32_left: 295ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian /* 2961dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * less than 32 bytes left at this point (length in r2) 2971dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 2981dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 2991dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* skip all this if there is nothing to do, which should 3001dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * be a common case (if not executed the code below takes 3011dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * about 16 cycles) 3021dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 3031dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project tst r2, #0x1F 3041dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project beq 1f 3051dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3061dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* conditionnaly copies 0 to 31 bytes */ 307ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian movs r12, r2, lsl #28 3081dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmcsia r1!, {r4, r5, r6, r7} /* 16 bytes */ 3091dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmmiia r1!, {r8, r9} /* 8 bytes */ 3101dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmcsia r0!, {r4, r5, r6, r7} 3111dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmmiia r0!, {r8, r9} 3121dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movs r12, r2, lsl #30 3131dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrcs r3, [r1], #4 /* 4 bytes */ 3141dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrmih r4, [r1], #2 /* 2 bytes */ 315ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian strcs r3, [r0], #4 3161dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strmih r4, [r0], #2 3171dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project tst r2, #0x1 3181dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrneb r3, [r1] /* last byte */ 3191dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strneb r3, [r0] 3201dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3211dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* we're done! restore everything and return */ 3221dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project1: ldmfd sp!, {r5-r11} 3231dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmfd sp!, {r0, r4, lr} 3241dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bx lr 3251dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3261dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /********************************************************************/ 3271dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3281dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectnon_congruent: 3291dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* 3301dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * here source is aligned to 4 bytes 3311dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * but destination is not. 3321dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * 333ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian * in the code below r2 is the number of bytes read 3341dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * (the number of bytes written is always smaller, because we have 3351dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * partial words in the shift queue) 3361dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 3371dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r2, #4 3381dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project blo copy_last_3_and_return 339ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 3401dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* Use post-incriment mode for stm to spill r5-r11 to reserved stack 3411dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * frame. Don't update sp. 3421dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 3431dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmea sp, {r5-r11} 344ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 3451dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* compute shifts needed to align src to dest */ 3461dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project rsb r5, r0, #0 3471dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project and r5, r5, #3 /* r5 = # bytes in partial words */ 348ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian mov r12, r5, lsl #3 /* r12 = right */ 3491dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project rsb lr, r12, #32 /* lr = left */ 350ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 3511dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* read the first word */ 3521dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldr r3, [r1], #4 3531dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project sub r2, r2, #4 354ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 3551dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* write a partial word (0 to 3 bytes), such that destination 3561dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * becomes aligned to 32 bits (r5 = nb of words to copy for alignment) 3571dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 3581dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movs r5, r5, lsl #31 3591dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strmib r3, [r0], #1 360ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian movmi r3, r3, lsr #8 3611dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r3, [r0], #1 3621dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movcs r3, r3, lsr #8 3631dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r3, [r0], #1 3641dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movcs r3, r3, lsr #8 3651dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3661dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r2, #4 3671dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project blo partial_word_tail 368ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 3691dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* Align destination to 32 bytes (cache line boundary) */ 3701dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project1: tst r0, #0x1c 3711dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project beq 2f 3721dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldr r5, [r1], #4 3731dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project sub r2, r2, #4 3741dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r4, r3, r5, lsl lr 3751dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r3, r5, lsr r12 3761dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project str r4, [r0], #4 3771dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r2, #4 3781dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bhs 1b 3791dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project blo partial_word_tail 3801dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3811dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* copy 32 bytes at a time */ 3821dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project2: subs r2, r2, #32 3831dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project blo less_than_thirtytwo 3841dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3851dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* Use immediate mode for the shifts, because there is an extra cycle 3861dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * for register shifts, which could account for up to 50% of 3871dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project * performance hit. 3881dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project */ 3891dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3901dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r12, #24 3911dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project beq loop24 3921dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r12, #8 3931dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project beq loop8 3941dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 3951dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectloop16: 3961dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldr r12, [r1], #4 3971dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project1: mov r4, r12 3981dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} 3991dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project PLD (r1, #64) 4001dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project subs r2, r2, #32 4011dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrhs r12, [r1], #4 4021dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r3, r3, r4, lsl #16 4031dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r4, r4, lsr #16 4041dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r4, r4, r5, lsl #16 4051dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r5, r5, lsr #16 4061dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r5, r5, r6, lsl #16 4071dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r6, r6, lsr #16 4081dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r6, r6, r7, lsl #16 4091dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r7, r7, lsr #16 4101dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r7, r7, r8, lsl #16 4111dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r8, r8, lsr #16 4121dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r8, r8, r9, lsl #16 4131dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r9, r9, lsr #16 4141dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r9, r9, r10, lsl #16 4151dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r10, r10, lsr #16 4161dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r10, r10, r11, lsl #16 4171dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} 4181dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r3, r11, lsr #16 4191dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bhs 1b 4201dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project b less_than_thirtytwo 4211dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4221dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectloop8: 4231dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldr r12, [r1], #4 4241dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project1: mov r4, r12 4251dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} 4261dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project PLD (r1, #64) 4271dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project subs r2, r2, #32 4281dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrhs r12, [r1], #4 4291dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r3, r3, r4, lsl #24 4301dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r4, r4, lsr #8 4311dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r4, r4, r5, lsl #24 4321dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r5, r5, lsr #8 4331dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r5, r5, r6, lsl #24 4341dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r6, r6, lsr #8 4351dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r6, r6, r7, lsl #24 4361dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r7, r7, lsr #8 4371dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r7, r7, r8, lsl #24 4381dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r8, r8, lsr #8 4391dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r8, r8, r9, lsl #24 4401dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r9, r9, lsr #8 4411dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r9, r9, r10, lsl #24 4421dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r10, r10, lsr #8 4431dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r10, r10, r11, lsl #24 4441dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} 4451dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r3, r11, lsr #8 4461dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bhs 1b 4471dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project b less_than_thirtytwo 4481dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4491dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectloop24: 4501dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldr r12, [r1], #4 4511dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project1: mov r4, r12 4521dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} 4531dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project PLD (r1, #64) 4541dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project subs r2, r2, #32 4551dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrhs r12, [r1], #4 4561dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r3, r3, r4, lsl #8 4571dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r4, r4, lsr #24 4581dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r4, r4, r5, lsl #8 4591dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r5, r5, lsr #24 4601dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r5, r5, r6, lsl #8 4611dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r6, r6, lsr #24 4621dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r6, r6, r7, lsl #8 4631dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r7, r7, lsr #24 4641dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r7, r7, r8, lsl #8 4651dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r8, r8, lsr #24 4661dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r8, r8, r9, lsl #8 4671dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r9, r9, lsr #24 4681dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r9, r9, r10, lsl #8 4691dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r10, r10, lsr #24 4701dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r10, r10, r11, lsl #8 4711dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} 4721dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r3, r11, lsr #24 4731dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bhs 1b 4741dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4751dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4761dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectless_than_thirtytwo: 4771dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* copy the last 0 to 31 bytes of the source */ 4781dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project rsb r12, lr, #32 /* we corrupted r12, recompute it */ 4791dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project add r2, r2, #32 4801dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r2, #4 4811dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project blo partial_word_tail 4821dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4831dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project1: ldr r5, [r1], #4 4841dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project sub r2, r2, #4 4851dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project orr r4, r3, r5, lsl lr 4861dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project mov r3, r5, lsr r12 4871dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project str r4, [r0], #4 4881dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project cmp r2, #4 4891dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bhs 1b 4901dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 4911dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectpartial_word_tail: 4921dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* we have a partial word in the input buffer */ 4931dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movs r5, lr, lsl #(31-3) 4941dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strmib r3, [r0], #1 4951dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movmi r3, r3, lsr #8 4961dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r3, [r0], #1 4971dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movcs r3, r3, lsr #8 4981dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r3, [r0], #1 499ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 5001dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* Refill spilled registers from the stack. Don't update sp. */ 5011dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmfd sp, {r5-r11} 5021dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 5031dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Projectcopy_last_3_and_return: 5041dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project movs r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */ 5051dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrmib r2, [r1], #1 5061dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrcsb r3, [r1], #1 5071dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldrcsb r12,[r1] 5081dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strmib r2, [r0], #1 5091dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r3, [r0], #1 5101dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project strcsb r12,[r0] 5111dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 5121dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project /* we're done! restore sp and spilled registers and return */ 5131dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project add sp, sp, #28 5141dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project ldmfd sp!, {r0, r4, lr} 5151dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project bx lr 5161dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project .fnend 5171dc9e472e19acfe6dc7f41e429236e7eef7ceda1The Android Open Source Project 518ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian 519ee223d02d96815c989b62043ff1237b1cd4e14b0Mathias Agopian#endif /* __ARM_ARCH__ < 7 */ 520