17e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Copyright (c) 2012, Linaro Limited 27e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer All rights reserved. 37e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 47e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer Redistribution and use in source and binary forms, with or without 57e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer modification, are permitted provided that the following conditions are met: 67e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer * Redistributions of source code must retain the above copyright 77e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer notice, this list of conditions and the following disclaimer. 87e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer * Redistributions in binary form must reproduce the above copyright 97e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer notice, this list of conditions and the following disclaimer in the 107e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer documentation and/or other materials provided with the distribution. 117e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer * Neither the name of the Linaro nor the 127e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer names of its contributors may be used to endorse or promote products 137e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer derived from this software without specific prior written permission. 147e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 157e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 167e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 177e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 187e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 197e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 207e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 217e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 227e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 237e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 247e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 257e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 267e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer*/ 277e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 287e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Assumptions: 297e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer * 307e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer * ARMv8-a, AArch64 317e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer */ 327e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 337e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#include <private/bionic_asm.h> 347e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 357e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define REP8_01 0x0101010101010101 367e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define REP8_7f 0x7f7f7f7f7f7f7f7f 377e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define REP8_80 0x8080808080808080 387e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 397e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Parameters and result. */ 407e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define src1 x0 417e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define src2 x1 427e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define result x0 437e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 447e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Internal variables. */ 457e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data1 x2 467e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data1w w2 477e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data2 x3 487e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data2w w3 497e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define has_nul x4 507e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define diff x5 517e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define syndrome x6 527e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define tmp1 x7 537e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define tmp2 x8 547e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define tmp3 x9 557e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define zeroones x10 567e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define pos x11 577e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 587e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* Start of performance-critical section -- one 64B cache line. */ 597e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard RosenkraenzerENTRY(strcmp) 607e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer eor tmp1, src1, src2 617e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer mov zeroones, #REP8_01 627e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer tst tmp1, #7 637e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer b.ne .Lmisaligned8 647e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ands tmp1, src1, #7 657e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer b.ne .Lmutual_align 667e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 677e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 687e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer can be done in parallel across the entire word. */ 697e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lloop_aligned: 707e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ldr data1, [src1], #8 717e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ldr data2, [src2], #8 727e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lstart_realigned: 737e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer sub tmp1, data1, zeroones 747e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer orr tmp2, data1, #REP8_7f 757e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer eor diff, data1, data2 /* Non-zero if differences found. */ 767e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ 777e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer orr syndrome, diff, has_nul 787e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer cbz syndrome, .Lloop_aligned 797e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* End of performance-critical section -- one 64B cache line. */ 807e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 817e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#ifndef __AARCH64EB__ 827e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer rev syndrome, syndrome 837e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer rev data1, data1 847e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* The MS-non-zero bit of the syndrome marks either the first bit 857e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer that is different, or the top bit of the first zero byte. 867e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer Shifting left now will bring the critical information into the 877e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer top bits. */ 887e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer clz pos, syndrome 897e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer rev data2, data2 907e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer lsl data1, data1, pos 917e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer lsl data2, data2, pos 927e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* But we need to zero-extend (char is unsigned) the value and then 937e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer perform a signed 32-bit subtraction. */ 947e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer lsr data1, data1, #56 957e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer sub result, data1, data2, lsr #56 967e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ret 977e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#else 987e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* For big-endian we cannot use the trick with the syndrome value 997e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer as carry-propagation can corrupt the upper bits if the trailing 1007e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer bytes in the string contain 0x01. */ 1017e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* However, if there is no NUL byte in the dword, we can generate 1027e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer the result directly. We can't just subtract the bytes as the 1037e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer MSB might be significant. */ 1047e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer cbnz has_nul, 1f 1057e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer cmp data1, data2 1067e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer cset result, ne 1077e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer cneg result, result, lo 1087e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ret 1097e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer1: 1107e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* Re-compute the NUL-byte detection, using a byte-reversed value. */ 1117e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer rev tmp3, data1 1127e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer sub tmp1, tmp3, zeroones 1137e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer orr tmp2, tmp3, #REP8_7f 1147e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer bic has_nul, tmp1, tmp2 1157e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer rev has_nul, has_nul 1167e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer orr syndrome, diff, has_nul 1177e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer clz pos, syndrome 1187e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* The MS-non-zero bit of the syndrome marks either the first bit 1197e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer that is different, or the top bit of the first zero byte. 1207e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer Shifting left now will bring the critical information into the 1217e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer top bits. */ 1227e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer lsl data1, data1, pos 1237e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer lsl data2, data2, pos 1247e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* But we need to zero-extend (char is unsigned) the value and then 1257e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer perform a signed 32-bit subtraction. */ 1267e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer lsr data1, data1, #56 1277e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer sub result, data1, data2, lsr #56 1287e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ret 1297e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#endif 1307e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 1317e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lmutual_align: 1327e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* Sources are mutually aligned, but are not currently at an 1337e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer alignment boundary. Round down the addresses and then mask off 1347e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer the bytes that preceed the start point. */ 1357e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer bic src1, src1, #7 1367e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer bic src2, src2, #7 1377e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ 1387e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ldr data1, [src1], #8 1397e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer neg tmp1, tmp1 /* Bits to alignment -64. */ 1407e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ldr data2, [src2], #8 1417e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer mov tmp2, #~0 1427e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#ifdef __AARCH64EB__ 1437e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* Big-endian. Early bytes are at MSB. */ 1447e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ 1457e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#else 1467e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* Little-endian. Early bytes are at LSB. */ 1477e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ 1487e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#endif 1497e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer orr data1, data1, tmp2 1507e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer orr data2, data2, tmp2 1517e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer b .Lstart_realigned 1527e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 1537e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lmisaligned8: 1547e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* We can do better than this. */ 1557e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ldrb data1w, [src1], #1 1567e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ldrb data2w, [src2], #1 1577e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer cmp data1w, #1 1587e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ 1597e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer b.eq .Lmisaligned8 1607e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer sub result, data1, data2 1617e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ret 1627e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard RosenkraenzerEND(strcmp) 163