131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris/*
231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * Copyright (c) 2013 ARM Ltd
331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * All rights reserved.
431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris *
531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * Redistribution and use in source and binary forms, with or without
631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * modification, are permitted provided that the following conditions
731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * are met:
831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * 1. Redistributions of source code must retain the above copyright
931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris *    notice, this list of conditions and the following disclaimer.
1031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * 2. Redistributions in binary form must reproduce the above copyright
1131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris *    notice, this list of conditions and the following disclaimer in the
1231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris *    documentation and/or other materials provided with the distribution.
1331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * 3. The name of the company may not be used to endorse or promote
1431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris *    products derived from this software without specific prior written
1531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris *    permission.
1631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris *
1731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
1831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
1931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
2031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
2131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
2231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
2331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
2431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
2531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
2631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris */
2831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
2931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#include <machine/cpu-features.h>
30851e68a2402fa414544e66650e09dfdaac813e51Elliott Hughes#include <private/bionic_asm.h>
3131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
3231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#ifdef __ARMEB__
3331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define S2LOMEM lsl
3431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define S2LOMEMEQ lsleq
3531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define S2HIMEM lsr
3631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define MSB 0x000000ff
3731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define LSB 0xff000000
3831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define BYTE0_OFFSET 24
3931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define BYTE1_OFFSET 16
4031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define BYTE2_OFFSET 8
4131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define BYTE3_OFFSET 0
4231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#else /* not  __ARMEB__ */
4331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define S2LOMEM lsr
4431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define S2LOMEMEQ lsreq
4531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define S2HIMEM lsl
4631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define BYTE0_OFFSET 0
4731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define BYTE1_OFFSET 8
4831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define BYTE2_OFFSET 16
4931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define BYTE3_OFFSET 24
5031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define MSB 0xff000000
5131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#define LSB 0x000000ff
5231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#endif /* not  __ARMEB__ */
5331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
5431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris.syntax         unified
5531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
5631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#if defined (__thumb__)
5731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        .thumb
5831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        .thumb_func
5931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#endif
6031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
6131dea25b8b6438df709f6b2c703cf385a2691e41Christopher FerrisENTRY(strcmp)
6231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris      /* Use LDRD whenever possible.  */
6331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
6431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris/* The main thing to look out for when comparing large blocks is that
6531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris   the loads do not cross a page boundary when loading past the index
6631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris   of the byte with the first difference or the first string-terminator.
6731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
6831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris   For example, if the strings are identical and the string-terminator
6931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris   is at index k, byte by byte comparison will not load beyond address
7031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris   s1+k and s2+k; word by word comparison may load up to 3 bytes beyond
7131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris   k; double word - up to 7 bytes.  If the load of these bytes crosses
7231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris   a page boundary, it might cause a memory fault (if the page is not mapped)
7331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris   that would not have happened in byte by byte comparison.
7431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
7531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris   If an address is (double) word aligned, then a load of a (double) word
7631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris   from that address will not cross a page boundary.
7731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris   Therefore, the algorithm below considers word and double-word alignment
7831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris   of strings separately.  */
7931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
8031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris/* High-level description of the algorithm.
8131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
8231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris   * The fast path: if both strings are double-word aligned,
8331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris     use LDRD to load two words from each string in every loop iteration.
8431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris   * If the strings have the same offset from a word boundary,
8531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris     use LDRB to load and compare byte by byte until
8631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris     the first string is aligned to a word boundary (at most 3 bytes).
8731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris     This is optimized for quick return on short unaligned strings.
8831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris   * If the strings have the same offset from a double-word boundary,
8931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris     use LDRD to load two words from each string in every loop iteration, as in the fast path.
9031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris   * If the strings do not have the same offset from a double-word boundary,
9131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris     load a word from the second string before the loop to initialize the queue.
9231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris     Use LDRD to load two words from every string in every loop iteration.
9331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris     Inside the loop, load the second word from the second string only after comparing
9431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris     the first word, using the queued value, to guarantee safety across page boundaries.
9531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris   * If the strings do not have the same offset from a word boundary,
9631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris     use LDR and a shift queue. Order of loads and comparisons matters,
9731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris     similarly to the previous case.
9831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
9931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris   * Use UADD8 and SEL to compare words, and use REV and CLZ to compute the return value.
10031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris   * The only difference between ARM and Thumb modes is the use of CBZ instruction.
10131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris   * The only difference between big and little endian is the use of REV in little endian
10231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris     to compute the return value, instead of MOV.
10331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris*/
10431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
10531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        .macro m_cbz reg label
10631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#ifdef __thumb2__
10731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        cbz     \reg, \label
10831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#else   /* not defined __thumb2__ */
10931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        cmp     \reg, #0
11031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        beq     \label
11131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#endif /* not defined __thumb2__ */
11231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        .endm /* m_cbz */
11331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
11431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        .macro m_cbnz reg label
11531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#ifdef __thumb2__
11631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        cbnz    \reg, \label
11731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#else   /* not defined __thumb2__ */
11831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        cmp     \reg, #0
11931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        bne     \label
12031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#endif /* not defined __thumb2__ */
12131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        .endm /* m_cbnz */
12231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
12331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        .macro  init
12431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Macro to save temporary registers and prepare magic values.  */
12531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        subs    sp, sp, #16
126bd7fe1d3c4c8877ac53839169851621249289bd7Christopher Ferris        .cfi_def_cfa_offset 16
12731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        strd    r4, r5, [sp, #8]
128bd7fe1d3c4c8877ac53839169851621249289bd7Christopher Ferris        .cfi_rel_offset r4, 0
129bd7fe1d3c4c8877ac53839169851621249289bd7Christopher Ferris        .cfi_rel_offset r5, 4
13031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        strd    r6, r7, [sp]
131bd7fe1d3c4c8877ac53839169851621249289bd7Christopher Ferris        .cfi_rel_offset r6, 8
132bd7fe1d3c4c8877ac53839169851621249289bd7Christopher Ferris        .cfi_rel_offset r7, 12
13331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        mvn     r6, #0  /* all F */
13431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        mov     r7, #0  /* all 0 */
13531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        .endm   /* init */
13631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
13731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        .macro  magic_compare_and_branch w1 w2 label
13831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Macro to compare registers w1 and w2 and conditionally branch to label.  */
13931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        cmp     \w1, \w2        /* Are w1 and w2 the same?  */
14031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        magic_find_zero_bytes \w1
14131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        it      eq
14231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        cmpeq   ip, #0          /* Is there a zero byte in w1?  */
14331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        bne     \label
14431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        .endm /* magic_compare_and_branch */
14531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
14631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        .macro  magic_find_zero_bytes w1
14731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Macro to find all-zero bytes in w1, result is in ip.  */
14831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        uadd8   ip, \w1, r6
14931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        sel     ip, r7, r6
15031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        .endm /* magic_find_zero_bytes */
15131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
15231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        .macro  setup_return w1 w2
15331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#ifdef __ARMEB__
15431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        mov     r1, \w1
15531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        mov     r2, \w2
15631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#else /* not  __ARMEB__ */
15731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        rev     r1, \w1
15831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        rev     r2, \w2
15931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#endif /* not  __ARMEB__ */
16031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        .endm /* setup_return */
16131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
16231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        pld [r0, #0]
16331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        pld [r1, #0]
16431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
16531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Are both strings double-word aligned?  */
16631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        orr     ip, r0, r1
16731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        tst     ip, #7
168a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        bne     .L_do_align
16931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
17031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Fast path.  */
17131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        init
17231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
173a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_doubleword_aligned:
17431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
17531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Get here when the strings to compare are double-word aligned.  */
17631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Compare two words in every iteration.  */
17731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        .p2align        2
17831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris2:
17931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        pld [r0, #16]
18031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        pld [r1, #16]
18131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
18231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Load the next double-word from each string.  */
18331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        ldrd    r2, r3, [r0], #8
18431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        ldrd    r4, r5, [r1], #8
18531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
186a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        magic_compare_and_branch w1=r2, w2=r4, label=.L_return_24
187a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        magic_compare_and_branch w1=r3, w2=r5, label=.L_return_35
18831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        b       2b
18931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
190a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_do_align:
19131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Is the first string word-aligned?  */
19231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        ands    ip, r0, #3
193a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        beq     .L_word_aligned_r0
19431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
19531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Fast compare byte by byte until the first string is word-aligned.  */
19631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes
19731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        to read until the next word boundary is 4-ip.  */
19831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        bic     r0, r0, #3
19931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        ldr     r2, [r0], #4
20031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        lsls    ip, ip, #31
201a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        beq     .L_byte2
202a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        bcs     .L_byte3
20331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
204a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_byte1:
20531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        ldrb    ip, [r1], #1
20631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        uxtb    r3, r2, ror #BYTE1_OFFSET
20731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        subs    ip, r3, ip
208a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        bne     .L_fast_return
209a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        m_cbz   reg=r3, label=.L_fast_return
21031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
211a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_byte2:
21231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        ldrb    ip, [r1], #1
21331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        uxtb    r3, r2, ror #BYTE2_OFFSET
21431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        subs    ip, r3, ip
215a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        bne     .L_fast_return
216a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        m_cbz   reg=r3, label=.L_fast_return
21731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
218a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_byte3:
21931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        ldrb    ip, [r1], #1
22031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        uxtb    r3, r2, ror #BYTE3_OFFSET
22131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        subs    ip, r3, ip
222a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        bne     .L_fast_return
223a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        m_cbnz  reg=r3, label=.L_word_aligned_r0
22431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
225a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_fast_return:
22631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        mov     r0, ip
22731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        bx      lr
22831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
229a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_word_aligned_r0:
23031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        init
23131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* The first string is word-aligned.  */
23231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Is the second string word-aligned?  */
23331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        ands    ip, r1, #3
234a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        bne     .L_strcmp_unaligned
23531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
236a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_word_aligned:
23731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* The strings are word-aligned. */
23831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Is the first string double-word aligned?  */
23931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        tst     r0, #4
240a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        beq     .L_doubleword_aligned_r0
24131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
24231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* If r0 is not double-word aligned yet, align it by loading
24331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        and comparing the next word from each string.  */
24431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        ldr     r2, [r0], #4
24531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        ldr     r4, [r1], #4
246a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        magic_compare_and_branch w1=r2 w2=r4 label=.L_return_24
24731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
248a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_doubleword_aligned_r0:
24931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Get here when r0 is double-word aligned.  */
25031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Is r1 doubleword_aligned?  */
25131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        tst     r1, #4
252a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        beq     .L_doubleword_aligned
25331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
25431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Get here when the strings to compare are word-aligned,
25531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        r0 is double-word aligned, but r1 is not double-word aligned.  */
25631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
25731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Initialize the queue.  */
25831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        ldr     r5, [r1], #4
25931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
26031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Compare two words in every iteration.  */
26131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        .p2align        2
26231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris3:
26331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        pld [r0, #16]
26431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        pld [r1, #16]
26531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
26631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Load the next double-word from each string and compare.  */
26731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        ldrd    r2, r3, [r0], #8
268a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        magic_compare_and_branch w1=r2 w2=r5 label=.L_return_25
26931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        ldrd    r4, r5, [r1], #8
270a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        magic_compare_and_branch w1=r3 w2=r4 label=.L_return_34
27131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        b       3b
27231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
27331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        .macro miscmp_word offsetlo offsethi
27431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Macro to compare misaligned strings.  */
27531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* r0, r1 are word-aligned, and at least one of the strings
27631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        is not double-word aligned.  */
27731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Compare one word in every loop iteration.  */
27831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* OFFSETLO is the original bit-offset of r1 from a word-boundary,
27931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        OFFSETHI is 32 - OFFSETLO (i.e., offset from the next word).  */
28031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
28131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Initialize the shift queue.  */
28231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        ldr     r5, [r1], #4
28331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
28431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Compare one word from each string in every loop iteration.  */
28531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        .p2align        2
28631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris7:
28731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        ldr     r3, [r0], #4
28831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        S2LOMEM r5, r5, #\offsetlo
28931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        magic_find_zero_bytes w1=r3
29031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        cmp     r7, ip, S2HIMEM #\offsetlo
29131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        and     r2, r3, r6, S2LOMEM #\offsetlo
29231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        it      eq
29331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        cmpeq   r2, r5
294a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        bne     .L_return_25
29531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        ldr     r5, [r1], #4
29631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        cmp     ip, #0
29731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        eor r3, r2, r3
29831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        S2HIMEM r2, r5, #\offsethi
29931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        it      eq
30031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        cmpeq   r3, r2
301a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        bne     .L_return_32
30231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        b       7b
30331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        .endm /* miscmp_word */
30431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
305a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_strcmp_unaligned:
30631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* r0 is word-aligned, r1 is at offset ip from a word.  */
30731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Align r1 to the (previous) word-boundary.  */
30831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        bic     r1, r1, #3
30931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
31031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Unaligned comparison word by word using LDRs. */
31131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        cmp     ip, #2
312a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        beq     .L_miscmp_word_16                 /* If ip == 2.  */
313a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        bge     .L_miscmp_word_24                 /* If ip == 3.  */
31431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        miscmp_word offsetlo=8 offsethi=24        /* If ip == 1.  */
315a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_miscmp_word_16:  miscmp_word offsetlo=16 offsethi=16
316a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_miscmp_word_24:  miscmp_word offsetlo=24 offsethi=8
31731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
31831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
319a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_return_32:
32031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        setup_return w1=r3, w2=r2
321a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        b       .L_do_return
322a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_return_34:
32331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        setup_return w1=r3, w2=r4
324a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        b       .L_do_return
325a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_return_25:
32631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        setup_return w1=r2, w2=r5
327a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        b       .L_do_return
328a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_return_35:
32931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        setup_return w1=r3, w2=r5
330a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        b       .L_do_return
331a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_return_24:
33231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        setup_return w1=r2, w2=r4
33331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
334a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_do_return:
33531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
33631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#ifdef __ARMEB__
33731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        mov     r0, ip
33831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#else /* not  __ARMEB__ */
33931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        rev     r0, ip
34031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris#endif /* not  __ARMEB__ */
34131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
34231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Restore temporaries early, before computing the return value.  */
34331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        ldrd    r6, r7, [sp]
34431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        ldrd    r4, r5, [sp, #8]
34531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        adds    sp, sp, #16
346bd7fe1d3c4c8877ac53839169851621249289bd7Christopher Ferris        .cfi_def_cfa_offset 0
347bd7fe1d3c4c8877ac53839169851621249289bd7Christopher Ferris        .cfi_restore r4
348bd7fe1d3c4c8877ac53839169851621249289bd7Christopher Ferris        .cfi_restore r5
349bd7fe1d3c4c8877ac53839169851621249289bd7Christopher Ferris        .cfi_restore r6
350bd7fe1d3c4c8877ac53839169851621249289bd7Christopher Ferris        .cfi_restore r7
35131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
35231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* There is a zero or a different byte between r1 and r2.  */
35331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* r0 contains a mask of all-zero bytes in r1.  */
35431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Using r0 and not ip here because cbz requires low register.  */
355a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris        m_cbz   reg=r0, label=.L_compute_return_value
35631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        clz     r0, r0
35731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* r0 contains the number of bits on the left of the first all-zero byte in r1.  */
35831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        rsb     r0, r0, #24
35931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* Here, r0 contains the number of bits on the right of the first all-zero byte in r1.  */
36031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        lsr     r1, r1, r0
36131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        lsr     r2, r2, r0
36231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris
363a57c9c084bc686a35f4f494ce23cf2a9bb3d5d00Christopher Ferris.L_compute_return_value:
36431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        movs    r0, #1
36531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        cmp     r1, r2
36631dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        /* The return value is computed as follows.
36731dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        If r1>r2 then (C==1 and Z==0) and LS doesn't hold and r0 is #1 at return.
36831dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        If r1<r2 then (C==0 and Z==0) and we execute SBC with carry_in=0,
36931dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        which means r0:=r0-r0-1 and r0 is #-1 at return.
37031dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        If r1=r2 then (C==1 and Z==1) and we execute SBC with carry_in=1,
37131dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        which means r0:=r0-r0 and r0 is #0 at return.
37231dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        (C==0 and Z==1) cannot happen because the carry bit is "not borrow".  */
37331dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        it      ls
37431dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        sbcls   r0, r0, r0
37531dea25b8b6438df709f6b2c703cf385a2691e41Christopher Ferris        bx      lr
37631dea25b8b6438df709f6b2c703cf385a2691e41Christopher FerrisEND(strcmp)
377