131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris/* 231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * Copyright (c) 2013 ARM Ltd 331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * All rights reserved. 431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * 531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * Redistribution and use in source and binary forms, with or without 631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * modification, are permitted provided that the following conditions 731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * are met: 831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * 1. Redistributions of source code must retain the above copyright 931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * notice, this list of conditions and the following disclaimer. 1031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * 2. Redistributions in binary form must reproduce the above copyright 1131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * notice, this list of conditions and the following disclaimer in the 1231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * documentation and/or other materials provided with the distribution. 1331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * 3. The name of the company may not be used to endorse or promote 1431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * products derived from this software without specific prior written 1531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * permission. 1631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * 1731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 1831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 1931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 2131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 2231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 2331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 2431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 2531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 2631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris */ 2831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 2931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#include <machine/cpu-features.h> 30851e68a2402fa414544e66650e09dfdaac813e51Elliott Hughes#include <private/bionic_asm.h> 3131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 3231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#ifdef __ARMEB__ 3331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define S2LOMEM lsl 3431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define S2LOMEMEQ lsleq 3531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define S2HIMEM lsr 3631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define MSB 0x000000ff 3731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define LSB 0xff000000 3831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define BYTE0_OFFSET 24 3931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define BYTE1_OFFSET 16 4031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define BYTE2_OFFSET 8 4131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define BYTE3_OFFSET 0 4231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#else /* not __ARMEB__ */ 4331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define S2LOMEM lsr 4431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define S2LOMEMEQ lsreq 4531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define S2HIMEM lsl 4631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define BYTE0_OFFSET 0 4731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define BYTE1_OFFSET 8 4831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define BYTE2_OFFSET 16 4931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define BYTE3_OFFSET 24 5031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define MSB 0xff000000 5131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define LSB 0x000000ff 5231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#endif /* not __ARMEB__ */ 5331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 5431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris.syntax unified 5531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 5631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#if defined (__thumb__) 5731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris .thumb 5831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris .thumb_func 5931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#endif 6031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 6131dea25b8b6438df709f6b2c703cf385a2691e41Christopher FerrisENTRY(strcmp) 6231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Use LDRD whenever possible. */ 6331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 6431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris/* The main thing to look out for when comparing large blocks is that 6531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris the loads do not cross a page boundary when loading past the index 6631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris of the byte with the first difference or the first string-terminator. 6731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 6831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris For example, if the strings are identical and the string-terminator 6931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris is at index k, byte by byte comparison will not load beyond address 7031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris s1+k and s2+k; word by word comparison may load up to 3 bytes beyond 7131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris k; double word - up to 7 bytes. If the load of these bytes crosses 7231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris a page boundary, it might cause a memory fault (if the page is not mapped) 7331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris that would not have happened in byte by byte comparison. 7431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 7531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris If an address is (double) word aligned, then a load of a (double) word 7631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris from that address will not cross a page boundary. 7731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris Therefore, the algorithm below considers word and double-word alignment 7831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris of strings separately. */ 7931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 8031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris/* High-level description of the algorithm. 8131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 8231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * The fast path: if both strings are double-word aligned, 8331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris use LDRD to load two words from each string in every loop iteration. 8431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * If the strings have the same offset from a word boundary, 8531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris use LDRB to load and compare byte by byte until 8631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris the first string is aligned to a word boundary (at most 3 bytes). 8731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris This is optimized for quick return on short unaligned strings. 8831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * If the strings have the same offset from a double-word boundary, 8931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris use LDRD to load two words from each string in every loop iteration, as in the fast path. 9031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * If the strings do not have the same offset from a double-word boundary, 9131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris load a word from the second string before the loop to initialize the queue. 9231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris Use LDRD to load two words from every string in every loop iteration. 9331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris Inside the loop, load the second word from the second string only after comparing 9431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris the first word, using the queued value, to guarantee safety across page boundaries. 9531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * If the strings do not have the same offset from a word boundary, 9631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris use LDR and a shift queue. Order of loads and comparisons matters, 9731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris similarly to the previous case. 9831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 9931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * Use UADD8 and SEL to compare words, and use REV and CLZ to compute the return value. 10031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * The only difference between ARM and Thumb modes is the use of CBZ instruction. 10131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * The only difference between big and little endian is the use of REV in little endian 10231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris to compute the return value, instead of MOV. 10331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris*/ 10431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 10531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris .macro m_cbz reg label 10631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#ifdef __thumb2__ 10731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris cbz \reg, \label 10831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#else /* not defined __thumb2__ */ 10931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris cmp \reg, #0 11031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris beq \label 11131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#endif /* not defined __thumb2__ */ 11231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris .endm /* m_cbz */ 11331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 11431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris .macro m_cbnz reg label 11531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#ifdef __thumb2__ 11631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris cbnz \reg, \label 11731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#else /* not defined __thumb2__ */ 11831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris cmp \reg, #0 11931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris bne \label 12031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#endif /* not defined __thumb2__ */ 12131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris .endm /* m_cbnz */ 12231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 12331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris .macro init 12431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Macro to save temporary registers and prepare magic values. */ 12531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris subs sp, sp, #16 126bd7fe1d3c4c8877ac53839169851621249289bd7Christopher Ferris .cfi_def_cfa_offset 16 12731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris strd r4, r5, [sp, #8] 128bd7fe1d3c4c8877ac53839169851621249289bd7Christopher Ferris .cfi_rel_offset r4, 0 129bd7fe1d3c4c8877ac53839169851621249289bd7Christopher Ferris .cfi_rel_offset r5, 4 13031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris strd r6, r7, [sp] 131bd7fe1d3c4c8877ac53839169851621249289bd7Christopher Ferris .cfi_rel_offset r6, 8 132bd7fe1d3c4c8877ac53839169851621249289bd7Christopher Ferris .cfi_rel_offset r7, 12 13331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris mvn r6, #0 /* all F */ 13431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris mov r7, #0 /* all 0 */ 13531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris .endm /* init */ 13631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 13731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris .macro magic_compare_and_branch w1 w2 label 13831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Macro to compare registers w1 and w2 and conditionally branch to label. */ 13931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris cmp \w1, \w2 /* Are w1 and w2 the same? */ 14031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris magic_find_zero_bytes \w1 14131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris it eq 14231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris cmpeq ip, #0 /* Is there a zero byte in w1? */ 14331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris bne \label 14431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris .endm /* magic_compare_and_branch */ 14531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 14631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris .macro magic_find_zero_bytes w1 14731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Macro to find all-zero bytes in w1, result is in ip. */ 14831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris uadd8 ip, \w1, r6 14931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris sel ip, r7, r6 15031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris .endm /* magic_find_zero_bytes */ 15131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 15231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris .macro setup_return w1 w2 15331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#ifdef __ARMEB__ 15431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris mov r1, \w1 15531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris mov r2, \w2 15631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#else /* not __ARMEB__ */ 15731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris rev r1, \w1 15831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris rev r2, \w2 15931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#endif /* not __ARMEB__ */ 16031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris .endm /* setup_return */ 16131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 16231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris pld [r0, #0] 16331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris pld [r1, #0] 16431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 16531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Are both strings double-word aligned? */ 16631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris orr ip, r0, r1 16731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris tst ip, #7 168a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris bne .L_do_align 16931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 17031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Fast path. */ 17131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris init 17231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 173a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_doubleword_aligned: 17431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 17531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Get here when the strings to compare are double-word aligned. */ 17631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Compare two words in every iteration. */ 17731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris .p2align 2 17831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris2: 17931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris pld [r0, #16] 18031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris pld [r1, #16] 18131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 18231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Load the next double-word from each string. */ 18331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris ldrd r2, r3, [r0], #8 18431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris ldrd r4, r5, [r1], #8 18531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 186a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris magic_compare_and_branch w1=r2, w2=r4, label=.L_return_24 187a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris magic_compare_and_branch w1=r3, w2=r5, label=.L_return_35 18831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris b 2b 18931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 190a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_do_align: 19131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Is the first string word-aligned? */ 19231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris ands ip, r0, #3 193a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris beq .L_word_aligned_r0 19431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 19531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Fast compare byte by byte until the first string is word-aligned. */ 19631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes 19731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris to read until the next word boundary is 4-ip. */ 19831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris bic r0, r0, #3 19931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris ldr r2, [r0], #4 20031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris lsls ip, ip, #31 201a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris beq .L_byte2 202a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris bcs .L_byte3 20331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 204a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_byte1: 20531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris ldrb ip, [r1], #1 20631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris uxtb r3, r2, ror #BYTE1_OFFSET 20731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris subs ip, r3, ip 208a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris bne .L_fast_return 209a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris m_cbz reg=r3, label=.L_fast_return 21031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 211a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_byte2: 21231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris ldrb ip, [r1], #1 21331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris uxtb r3, r2, ror #BYTE2_OFFSET 21431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris subs ip, r3, ip 215a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris bne .L_fast_return 216a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris m_cbz reg=r3, label=.L_fast_return 21731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 218a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_byte3: 21931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris ldrb ip, [r1], #1 22031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris uxtb r3, r2, ror #BYTE3_OFFSET 22131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris subs ip, r3, ip 222a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris bne .L_fast_return 223a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris m_cbnz reg=r3, label=.L_word_aligned_r0 22431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 225a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_fast_return: 22631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris mov r0, ip 22731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris bx lr 22831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 229a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_word_aligned_r0: 23031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris init 23131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* The first string is word-aligned. */ 23231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Is the second string word-aligned? */ 23331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris ands ip, r1, #3 234a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris bne .L_strcmp_unaligned 23531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 236a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_word_aligned: 23731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* The strings are word-aligned. */ 23831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Is the first string double-word aligned? */ 23931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris tst r0, #4 240a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris beq .L_doubleword_aligned_r0 24131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 24231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* If r0 is not double-word aligned yet, align it by loading 24331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris and comparing the next word from each string. */ 24431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris ldr r2, [r0], #4 24531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris ldr r4, [r1], #4 246a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris magic_compare_and_branch w1=r2 w2=r4 label=.L_return_24 24731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 248a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_doubleword_aligned_r0: 24931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Get here when r0 is double-word aligned. */ 25031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Is r1 doubleword_aligned? */ 25131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris tst r1, #4 252a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris beq .L_doubleword_aligned 25331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 25431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Get here when the strings to compare are word-aligned, 25531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris r0 is double-word aligned, but r1 is not double-word aligned. */ 25631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 25731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Initialize the queue. */ 25831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris ldr r5, [r1], #4 25931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 26031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Compare two words in every iteration. */ 26131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris .p2align 2 26231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris3: 26331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris pld [r0, #16] 26431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris pld [r1, #16] 26531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 26631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Load the next double-word from each string and compare. */ 26731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris ldrd r2, r3, [r0], #8 268a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris magic_compare_and_branch w1=r2 w2=r5 label=.L_return_25 26931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris ldrd r4, r5, [r1], #8 270a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris magic_compare_and_branch w1=r3 w2=r4 label=.L_return_34 27131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris b 3b 27231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 27331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris .macro miscmp_word offsetlo offsethi 27431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Macro to compare misaligned strings. */ 27531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* r0, r1 are word-aligned, and at least one of the strings 27631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris is not double-word aligned. */ 27731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Compare one word in every loop iteration. */ 27831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* OFFSETLO is the original bit-offset of r1 from a word-boundary, 27931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris OFFSETHI is 32 - OFFSETLO (i.e., offset from the next word). */ 28031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 28131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Initialize the shift queue. */ 28231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris ldr r5, [r1], #4 28331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 28431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Compare one word from each string in every loop iteration. */ 28531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris .p2align 2 28631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris7: 28731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris ldr r3, [r0], #4 28831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris S2LOMEM r5, r5, #\offsetlo 28931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris magic_find_zero_bytes w1=r3 29031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris cmp r7, ip, S2HIMEM #\offsetlo 29131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris and r2, r3, r6, S2LOMEM #\offsetlo 29231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris it eq 29331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris cmpeq r2, r5 294a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris bne .L_return_25 29531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris ldr r5, [r1], #4 29631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris cmp ip, #0 29731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris eor r3, r2, r3 29831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris S2HIMEM r2, r5, #\offsethi 29931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris it eq 30031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris cmpeq r3, r2 301a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris bne .L_return_32 30231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris b 7b 30331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris .endm /* miscmp_word */ 30431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 305a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_strcmp_unaligned: 30631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* r0 is word-aligned, r1 is at offset ip from a word. */ 30731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Align r1 to the (previous) word-boundary. */ 30831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris bic r1, r1, #3 30931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 31031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Unaligned comparison word by word using LDRs. */ 31131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris cmp ip, #2 312a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris beq .L_miscmp_word_16 /* If ip == 2. */ 313a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris bge .L_miscmp_word_24 /* If ip == 3. */ 31431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris miscmp_word offsetlo=8 offsethi=24 /* If ip == 1. */ 315a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_miscmp_word_16: miscmp_word offsetlo=16 offsethi=16 316a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_miscmp_word_24: miscmp_word offsetlo=24 offsethi=8 31731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 31831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 319a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_return_32: 32031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris setup_return w1=r3, w2=r2 321a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris b .L_do_return 322a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_return_34: 32331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris setup_return w1=r3, w2=r4 324a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris b .L_do_return 325a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_return_25: 32631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris setup_return w1=r2, w2=r5 327a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris b .L_do_return 328a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_return_35: 32931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris setup_return w1=r3, w2=r5 330a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris b .L_do_return 331a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_return_24: 33231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris setup_return w1=r2, w2=r4 33331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 334a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_do_return: 33531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 33631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#ifdef __ARMEB__ 33731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris mov r0, ip 33831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#else /* not __ARMEB__ */ 33931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris rev r0, ip 34031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#endif /* not __ARMEB__ */ 34131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 34231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Restore temporaries early, before computing the return value. */ 34331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris ldrd r6, r7, [sp] 34431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris ldrd r4, r5, [sp, #8] 34531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris adds sp, sp, #16 346bd7fe1d3c4c8877ac53839169851621249289bd7Christopher Ferris .cfi_def_cfa_offset 0 347bd7fe1d3c4c8877ac53839169851621249289bd7Christopher Ferris .cfi_restore r4 348bd7fe1d3c4c8877ac53839169851621249289bd7Christopher Ferris .cfi_restore r5 349bd7fe1d3c4c8877ac53839169851621249289bd7Christopher Ferris .cfi_restore r6 350bd7fe1d3c4c8877ac53839169851621249289bd7Christopher Ferris .cfi_restore r7 35131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 35231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* There is a zero or a different byte between r1 and r2. */ 35331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* r0 contains a mask of all-zero bytes in r1. */ 35431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Using r0 and not ip here because cbz requires low register. */ 355a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris m_cbz reg=r0, label=.L_compute_return_value 35631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris clz r0, r0 35731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* r0 contains the number of bits on the left of the first all-zero byte in r1. */ 35831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris rsb r0, r0, #24 35931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* Here, r0 contains the number of bits on the right of the first all-zero byte in r1. */ 36031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris lsr r1, r1, r0 36131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris lsr r2, r2, r0 36231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris 363a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_compute_return_value: 36431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris movs r0, #1 36531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris cmp r1, r2 36631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris /* The return value is computed as follows. 36731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris If r1>r2 then (C==1 and Z==0) and LS doesn't hold and r0 is #1 at return. 36831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris If r1<r2 then (C==0 and Z==0) and we execute SBC with carry_in=0, 36931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris which means r0:=r0-r0-1 and r0 is #-1 at return. 37031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris If r1=r2 then (C==1 and Z==1) and we execute SBC with carry_in=1, 37131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris which means r0:=r0-r0 and r0 is #0 at return. 37231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris (C==0 and Z==1) cannot happen because the carry bit is "not borrow". */ 37331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris it ls 37431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris sbcls r0, r0, r0 37531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris bx lr 37631dea25b8b6438df709f6b2c703cf385a2691e41Christopher FerrisEND(strcmp) 377