17e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Copyright (c) 2012, Linaro Limited 27e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer All rights reserved. 37e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 47e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer Redistribution and use in source and binary forms, with or without 57e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer modification, are permitted provided that the following conditions are met: 67e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer * Redistributions of source code must retain the above copyright 77e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer notice, this list of conditions and the following disclaimer. 87e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer * Redistributions in binary form must reproduce the above copyright 97e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer notice, this list of conditions and the following disclaimer in the 107e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer documentation and/or other materials provided with the distribution. 117e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer * Neither the name of the Linaro nor the 127e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer names of its contributors may be used to endorse or promote products 137e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer derived from this software without specific prior written permission. 147e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 157e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 167e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 177e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 187e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 197e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 207e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 217e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 227e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 237e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 247e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 257e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 267e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer*/ 277e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 287e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Assumptions: 297e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer * 307e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer * ARMv8-a, AArch64 317e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer */ 327e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 337e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#include <private/bionic_asm.h> 347e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 357e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define REP8_01 0x0101010101010101 367e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define REP8_7f 0x7f7f7f7f7f7f7f7f 377e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define REP8_80 0x8080808080808080 387e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 397e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Parameters and result. */ 407e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define src1 x0 417e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define src2 x1 427e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define result x0 437e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 447e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer/* Internal variables. */ 457e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data1 x2 467e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data1w w2 477e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data2 x3 487e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define data2w w3 497e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define has_nul x4 507e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define diff x5 517e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define syndrome x6 527e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define tmp1 x7 537e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define tmp2 x8 547e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define tmp3 x9 557e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define zeroones x10 567e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#define pos x11 577e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 587e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* Start of performance-critical section -- one 64B cache line. */ 597e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard RosenkraenzerENTRY(strcmp) 609d150dd9a09132561a10c98de6b79b0b318d4e7dYuanyuan Zhong.p2align 6 617e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer eor tmp1, src1, src2 627e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer mov zeroones, #REP8_01 637e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer tst tmp1, #7 647e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer b.ne .Lmisaligned8 657e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ands tmp1, src1, #7 667e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer b.ne .Lmutual_align 677e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 687e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 697e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer can be done in parallel across the entire word. */ 707e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lloop_aligned: 717e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ldr data1, [src1], #8 727e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ldr data2, [src2], #8 737e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lstart_realigned: 747e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer sub tmp1, data1, zeroones 757e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer orr tmp2, data1, #REP8_7f 767e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer eor diff, data1, data2 /* Non-zero if differences found. */ 777e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ 787e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer orr syndrome, diff, has_nul 797e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer cbz syndrome, .Lloop_aligned 807e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* End of performance-critical section -- one 64B cache line. */ 817e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 827e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#ifndef __AARCH64EB__ 837e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer rev syndrome, syndrome 847e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer rev data1, data1 857e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* The MS-non-zero bit of the syndrome marks either the first bit 867e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer that is different, or the top bit of the first zero byte. 877e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer Shifting left now will bring the critical information into the 887e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer top bits. */ 897e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer clz pos, syndrome 907e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer rev data2, data2 917e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer lsl data1, data1, pos 927e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer lsl data2, data2, pos 937e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* But we need to zero-extend (char is unsigned) the value and then 947e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer perform a signed 32-bit subtraction. */ 957e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer lsr data1, data1, #56 967e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer sub result, data1, data2, lsr #56 977e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ret 987e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#else 997e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* For big-endian we cannot use the trick with the syndrome value 1007e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer as carry-propagation can corrupt the upper bits if the trailing 1017e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer bytes in the string contain 0x01. */ 1027e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* However, if there is no NUL byte in the dword, we can generate 1037e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer the result directly. We can't just subtract the bytes as the 1047e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer MSB might be significant. */ 1057e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer cbnz has_nul, 1f 1067e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer cmp data1, data2 1077e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer cset result, ne 1087e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer cneg result, result, lo 1097e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ret 1107e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer1: 1117e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* Re-compute the NUL-byte detection, using a byte-reversed value. */ 1127e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer rev tmp3, data1 1137e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer sub tmp1, tmp3, zeroones 1147e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer orr tmp2, tmp3, #REP8_7f 1157e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer bic has_nul, tmp1, tmp2 1167e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer rev has_nul, has_nul 1177e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer orr syndrome, diff, has_nul 1187e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer clz pos, syndrome 1197e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* The MS-non-zero bit of the syndrome marks either the first bit 1207e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer that is different, or the top bit of the first zero byte. 1217e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer Shifting left now will bring the critical information into the 1227e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer top bits. */ 1237e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer lsl data1, data1, pos 1247e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer lsl data2, data2, pos 1257e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* But we need to zero-extend (char is unsigned) the value and then 1267e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer perform a signed 32-bit subtraction. */ 1277e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer lsr data1, data1, #56 1287e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer sub result, data1, data2, lsr #56 1297e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ret 1307e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#endif 1317e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 1327e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lmutual_align: 1337e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* Sources are mutually aligned, but are not currently at an 1347e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer alignment boundary. Round down the addresses and then mask off 1357e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer the bytes that preceed the start point. */ 1367e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer bic src1, src1, #7 1377e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer bic src2, src2, #7 1387e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ 1397e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ldr data1, [src1], #8 1407e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer neg tmp1, tmp1 /* Bits to alignment -64. */ 1417e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ldr data2, [src2], #8 1427e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer mov tmp2, #~0 1437e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#ifdef __AARCH64EB__ 1447e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* Big-endian. Early bytes are at MSB. */ 1457e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ 1467e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#else 1477e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* Little-endian. Early bytes are at LSB. */ 1487e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ 1497e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer#endif 1507e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer orr data1, data1, tmp2 1517e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer orr data2, data2, tmp2 1527e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer b .Lstart_realigned 1537e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer 1547e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer.Lmisaligned8: 1557e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer /* We can do better than this. */ 1567e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ldrb data1w, [src1], #1 1577e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ldrb data2w, [src2], #1 1587e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer cmp data1w, #1 1597e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ 1607e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer b.eq .Lmisaligned8 1617e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer sub result, data1, data2 1627e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard Rosenkraenzer ret 1637e4fa560999d07064d219a16ebb50d3691dd1b63Bernhard RosenkraenzerEND(strcmp) 164