179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Copyright 2010 Google Inc. All Rights Reserved.
279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka//
379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Licensed under the Apache License, Version 2.0 (the "License");
479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// you may not use this file except in compliance with the License.
579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// You may obtain a copy of the License at
679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka//
779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// http://www.apache.org/licenses/LICENSE-2.0
879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka//
979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Unless required by applicable law or agreed to in writing, software
1079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// distributed under the License is distributed on an "AS IS" BASIS,
1179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// See the License for the specific language governing permissions and
1379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// limitations under the License.
1479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka//
1579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Function to find maximal matching prefixes of strings.
1679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
1779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
1879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
1979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
2079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include <stdint.h>
2179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
2279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include "./port.h"
2379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
2479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkanamespace brotli {
2579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
2679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Separate implementation for x86_64, for speed.
2779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#if defined(__GNUC__) && defined(ARCH_K8)
2879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
2979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkastatic inline int FindMatchLengthWithLimit(const uint8_t* s1,
3079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                                           const uint8_t* s2,
3179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                                           size_t limit) {
3279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int matched = 0;
3379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  size_t limit2 = (limit >> 3) + 1;  // + 1 is for pre-decrement in while
3479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  while (PREDICT_TRUE(--limit2)) {
3579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    if (PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64(s2) ==
3679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                      BROTLI_UNALIGNED_LOAD64(s1 + matched))) {
3779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      s2 += 8;
3879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      matched += 8;
3979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    } else {
4079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      uint64_t x =
4179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka          BROTLI_UNALIGNED_LOAD64(s2) ^ BROTLI_UNALIGNED_LOAD64(s1 + matched);
4279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      int matching_bits =  __builtin_ctzll(x);
4379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      matched += matching_bits >> 3;
4479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      return matched;
4579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    }
4679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
4779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  limit = (limit & 7) + 1;  // + 1 is for pre-decrement in while
4879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  while (--limit) {
4979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    if (PREDICT_TRUE(s1[matched] == *s2)) {
5079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      ++s2;
5179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      ++matched;
5279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    } else {
5379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      return matched;
5479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    }
5579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
5679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  return matched;
5779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
5879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#else
5979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkastatic inline int FindMatchLengthWithLimit(const uint8_t* s1,
6079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                                           const uint8_t* s2,
6179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                                           size_t limit) {
6279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int matched = 0;
6379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  const uint8_t* s2_limit = s2 + limit;
6479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  const uint8_t* s2_ptr = s2;
6579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  // Find out how long the match is. We loop over the data 32 bits at a
6679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  // time until we find a 32-bit block that doesn't match; then we find
6779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  // the first non-matching bit and use that to calculate the total
6879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  // length of the match.
6979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  while (s2_ptr <= s2_limit - 4 &&
7079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka         BROTLI_UNALIGNED_LOAD32(s2_ptr) ==
7179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka         BROTLI_UNALIGNED_LOAD32(s1 + matched)) {
7279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    s2_ptr += 4;
7379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    matched += 4;
7479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
7579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) {
7679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    ++s2_ptr;
7779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    ++matched;
7879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
7979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  return matched;
8079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
8179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#endif
8279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
8379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}  // namespace brotli
8479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
8579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#endif  // BROTLI_ENC_FIND_MATCH_LENGTH_H_
86