186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu/*
286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * Copyright (C) 2014 The Android Open Source Project
386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu *
486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * Licensed under the Apache License, Version 2.0 (the "License");
586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * you may not use this file except in compliance with the License.
686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * You may obtain a copy of the License at
786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu *
886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu *      http://www.apache.org/licenses/LICENSE-2.0
986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu *
1086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * Unless required by applicable law or agreed to in writing, software
1186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * distributed under the License is distributed on an "AS IS" BASIS,
1286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * See the License for the specific language governing permissions and
1486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * limitations under the License.
1586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu */
1686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu
1786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu/* Assumptions:
1886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu *
1986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu * ARMv8-a, AArch64
2086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu */
2186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu
2286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#ifndef ART_RUNTIME_ARCH_ARM64_MEMCMP16_ARM64_S_
2386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define ART_RUNTIME_ARCH_ARM64_MEMCMP16_ARM64_S_
2486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu
2586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#include "asm_support_arm64.S"
2686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu
2786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu/* Parameters and result.  */
2886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define src1        x0
2986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define src2        x1
3086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define limit       x2
3186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define result      x0
3286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu
3386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu/* Internal variables.  */
3486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define data1       x3
3586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define data1w      w3
3686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define data2       x4
3786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define data2w      w4
3886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define has_nul     x5
3986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define diff        x6
4086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define endloop     x7
4186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define tmp1        x8
4286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define tmp2        x9
4386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define tmp3        x10
4486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define limit_wd    x12
4586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#define mask        x13
4686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu
4786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu// WARNING: If you change this code to use x14 and x15, you must also change
4886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu//          art_quick_string_compareto, which relies on these temps being unused.
4986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu
5086797a791d692f81def5c1b5f0918992c49ed122Serban ConstantinescuENTRY __memcmp16
5186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  cbz     limit, .Lret0
5286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  lsl     limit, limit, #1  /* Half-words to bytes.  */
5386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  eor     tmp1, src1, src2
5486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  tst     tmp1, #7
5586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  b.ne    .Lmisaligned8
5686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  ands    tmp1, src1, #7
5786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  b.ne    .Lmutual_align
5886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  add     limit_wd, limit, #7
5986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  lsr     limit_wd, limit_wd, #3
6086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  /* Start of performance-critical section  -- one 64B cache line.  */
6186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu.Lloop_aligned:
6286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  ldr     data1, [src1], #8
6386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  ldr     data2, [src2], #8
6486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu.Lstart_realigned:
6586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  subs    limit_wd, limit_wd, #1
6686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  eor     diff, data1, data2  /* Non-zero if differences found.  */
6786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  csinv   endloop, diff, xzr, ne  /* Last Dword or differences.  */
6886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  cbz     endloop, .Lloop_aligned
6986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  /* End of performance-critical section  -- one 64B cache line.  */
7086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu
7186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  /* Not reached the limit, must have found a diff.  */
7286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  cbnz    limit_wd, .Lnot_limit
7386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu
7486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  /* Limit % 8 == 0 => all bytes significant.  */
7586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  ands    limit, limit, #7
7686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  b.eq    .Lnot_limit
7786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu
7886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  lsl     limit, limit, #3  /* Bits -> bytes.  */
7986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  mov     mask, #~0
8086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  lsl     mask, mask, limit
8186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  bic     data1, data1, mask
8286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  bic     data2, data2, mask
8386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu
8486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu.Lnot_limit:
8586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu
8686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  // Swap the byte order of diff. Exact reverse is not important, as we only need to detect
8786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  // the half-word.
8886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  rev     diff, diff
8986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  // The most significant bit of DIFF marks the least significant bit of change between DATA1/2
9086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  clz     diff, diff
9186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  // Mask off 0xF to have shift amount. Why does ARM64 not have BIC with immediate?!?!
9286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  bfi     diff, xzr, #0, #4
9386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  // Create a 16b mask
9486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  mov     mask, #0xFFFF
9586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  // Shift to the right half-word.
9686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  lsr     data1, data1, diff
9786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  lsr     data2, data2, diff
9886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  // Mask the lowest half-word.
9986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  and     data1, data1, mask
10086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  and     data2, data2, mask
10186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  // Compute difference.
10286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  sub     result, data1, data2
10386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  ret
10486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu
10586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu.Lmutual_align:
10686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  /* Sources are mutually aligned, but are not currently at an
10786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu     alignment boundary.  Round down the addresses and then mask off
10886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu     the bytes that precede the start point.  */
10986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  bic     src1, src1, #7
11086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  bic     src2, src2, #7
11186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  add     limit, limit, tmp1  /* Adjust the limit for the extra.  */
11286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  lsl     tmp1, tmp1, #3    /* Bytes beyond alignment -> bits.  */
11386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  ldr     data1, [src1], #8
11486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  neg     tmp1, tmp1    /* Bits to alignment -64.  */
11586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  ldr     data2, [src2], #8
11686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  mov     tmp2, #~0
11786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  /* Little-endian.  Early bytes are at LSB.  */
11886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  lsr     tmp2, tmp2, tmp1  /* Shift (tmp1 & 63).  */
11986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  add     limit_wd, limit, #7
12086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  orr     data1, data1, tmp2
12186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  orr     data2, data2, tmp2
12286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  lsr     limit_wd, limit_wd, #3
12386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  b       .Lstart_realigned
12486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu
12586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu.Lret0:
12686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  mov     result, #0
12786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  ret
12886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu
12986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  .p2align 6
13086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu.Lmisaligned8:
13186797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  sub     limit, limit, #1
13286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu1:
13386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  /* Perhaps we can do better than this.  */
13486797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  ldrh    data1w, [src1], #2
13586797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  ldrh    data2w, [src2], #2
13686797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  subs    limit, limit, #2
13786797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  ccmp    data1w, data2w, #0, cs  /* NZCV = 0b0000.  */
13886797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  b.eq    1b
13986797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  sub     result, data1, data2
14086797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu  ret
14186797a791d692f81def5c1b5f0918992c49ed122Serban ConstantinescuEND __memcmp16
14286797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu
14386797a791d692f81def5c1b5f0918992c49ed122Serban Constantinescu#endif  // ART_RUNTIME_ARCH_ARM64_MEMCMP16_ARM64_S_
144