186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu/* 286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * Copyright (C) 2014 The Android Open Source Project 386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * 486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * Licensed under the Apache License, Version 2.0 (the "License"); 586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * you may not use this file except in compliance with the License. 686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * You may obtain a copy of the License at 786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * 886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * http://www.apache.org/licenses/LICENSE-2.0 986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * 1086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * Unless required by applicable law or agreed to in writing, software 1186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * distributed under the License is distributed on an "AS IS" BASIS, 1286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * See the License for the specific language governing permissions and 1486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * limitations under the License. 1586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu */ 1686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu 1786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu/* Assumptions: 1886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * 1986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * ARMv8-a, AArch64 2086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu */ 2186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu 2286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#ifndef ART_RUNTIME_ARCH_ARM64_MEMCMP16_ARM64_S_ 2386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define ART_RUNTIME_ARCH_ARM64_MEMCMP16_ARM64_S_ 2486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu 2586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#include "asm_support_arm64.S" 2686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu 2786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu/* Parameters and result. */ 2886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define src1 x0 2986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define src2 x1 3086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define limit x2 3186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define result x0 3286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu 3386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu/* Internal variables. */ 3486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define data1 x3 3586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define data1w w3 3686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define data2 x4 3786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define data2w w4 3886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define has_nul x5 3986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define diff x6 4086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define endloop x7 4186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define tmp1 x8 4286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define tmp2 x9 4386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define tmp3 x10 4486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define limit_wd x12 4586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define mask x13 4686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu 4786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu// WARNING: If you change this code to use x14 and x15, you must also change 4886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu// art_quick_string_compareto, which relies on these temps being unused. 4986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu 5086797a791d692f81def5c1b5f0918992c49ed122Serban ConstantinescuENTRY __memcmp16 5186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu cbz limit, .Lret0 5286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu lsl limit, limit, #1 /* Half-words to bytes. */ 5386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu eor tmp1, src1, src2 5486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu tst tmp1, #7 5586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu b.ne .Lmisaligned8 5686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu ands tmp1, src1, #7 5786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu b.ne .Lmutual_align 5886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu add limit_wd, limit, #7 5986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu lsr limit_wd, limit_wd, #3 6086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu /* Start of performance-critical section -- one 64B cache line. */ 6186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu.Lloop_aligned: 6286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu ldr data1, [src1], #8 6386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu ldr data2, [src2], #8 6486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu.Lstart_realigned: 6586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu subs limit_wd, limit_wd, #1 6686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu eor diff, data1, data2 /* Non-zero if differences found. */ 6786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu csinv endloop, diff, xzr, ne /* Last Dword or differences. */ 6886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu cbz endloop, .Lloop_aligned 6986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu /* End of performance-critical section -- one 64B cache line. */ 7086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu 7186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu /* Not reached the limit, must have found a diff. */ 7286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu cbnz limit_wd, .Lnot_limit 7386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu 7486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu /* Limit % 8 == 0 => all bytes significant. */ 7586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu ands limit, limit, #7 7686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu b.eq .Lnot_limit 7786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu 7886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu lsl limit, limit, #3 /* Bits -> bytes. */ 7986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu mov mask, #~0 8086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu lsl mask, mask, limit 8186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu bic data1, data1, mask 8286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu bic data2, data2, mask 8386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu 8486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu.Lnot_limit: 8586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu 8686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu // Swap the byte order of diff. Exact reverse is not important, as we only need to detect 8786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu // the half-word. 8886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu rev diff, diff 8986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu // The most significant bit of DIFF marks the least significant bit of change between DATA1/2 9086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu clz diff, diff 9186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu // Mask off 0xF to have shift amount. Why does ARM64 not have BIC with immediate?!?! 9286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu bfi diff, xzr, #0, #4 9386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu // Create a 16b mask 9486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu mov mask, #0xFFFF 9586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu // Shift to the right half-word. 9686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu lsr data1, data1, diff 9786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu lsr data2, data2, diff 9886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu // Mask the lowest half-word. 9986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu and data1, data1, mask 10086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu and data2, data2, mask 10186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu // Compute difference. 10286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu sub result, data1, data2 10386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu ret 10486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu 10586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu.Lmutual_align: 10686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu /* Sources are mutually aligned, but are not currently at an 10786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu alignment boundary. Round down the addresses and then mask off 10886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu the bytes that precede the start point. */ 10986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu bic src1, src1, #7 11086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu bic src2, src2, #7 11186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu add limit, limit, tmp1 /* Adjust the limit for the extra. */ 11286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ 11386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu ldr data1, [src1], #8 11486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu neg tmp1, tmp1 /* Bits to alignment -64. */ 11586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu ldr data2, [src2], #8 11686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu mov tmp2, #~0 11786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu /* Little-endian. Early bytes are at LSB. */ 11886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ 11986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu add limit_wd, limit, #7 12086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu orr data1, data1, tmp2 12186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu orr data2, data2, tmp2 12286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu lsr limit_wd, limit_wd, #3 12386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu b .Lstart_realigned 12486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu 12586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu.Lret0: 12686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu mov result, #0 12786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu ret 12886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu 12986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu .p2align 6 13086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu.Lmisaligned8: 13186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu sub limit, limit, #1 13286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu1: 13386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu /* Perhaps we can do better than this. */ 13486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu ldrh data1w, [src1], #2 13586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu ldrh data2w, [src2], #2 13686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu subs limit, limit, #2 13786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ 13886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu b.eq 1b 13986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu sub result, data1, data2 14086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu ret 14186797a791d692f81def5c1b5f0918992c49ed122Serban ConstantinescuEND __memcmp16 14286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu 14386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#endif // ART_RUNTIME_ARCH_ARM64_MEMCMP16_ARM64_S_ 144