1// Copyright 2007 Google Inc.
2// Authors: Jeff Dean, Lincoln Smith
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8//      http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16#include <config.h>
17#include "rolling_hash.h"
18#include <stdint.h>  // uint32_t
19#include <stdlib.h>  // rand, srand
20#include <vector>
21#include "testing.h"
22
23namespace open_vcdiff {
24namespace {
25
26static const uint32_t kBase = RollingHashUtil::kBase;
27
28class RollingHashSimpleTest : public testing::Test {
29 protected:
30  RollingHashSimpleTest() { }
31  virtual ~RollingHashSimpleTest() { }
32
33  void TestModBase(uint32_t operand) {
34    EXPECT_EQ(operand % kBase, RollingHashUtil::ModBase(operand));
35    EXPECT_EQ(static_cast<uint32_t>((-static_cast<int32_t>(operand)) % kBase),
36              RollingHashUtil::FindModBaseInverse(operand));
37    EXPECT_EQ(0U, RollingHashUtil::ModBase(
38        operand + RollingHashUtil::FindModBaseInverse(operand)));
39  }
40
41  void TestHashFirstTwoBytes(char first_value, char second_value) {
42    char buf[2];
43    buf[0] = first_value;
44    buf[1] = second_value;
45    EXPECT_EQ(RollingHashUtil::HashFirstTwoBytes(buf),
46              RollingHashUtil::HashStep(RollingHashUtil::HashStep(0,
47                                                                  first_value),
48                                        second_value));
49    EXPECT_EQ(RollingHashUtil::HashFirstTwoBytes(buf),
50              RollingHashUtil::HashStep(static_cast<unsigned char>(first_value),
51                                        second_value));
52  }
53};
54
55#ifdef GTEST_HAS_DEATH_TEST
56typedef RollingHashSimpleTest RollingHashDeathTest;
57#endif  // GTEST_HAS_DEATH_TEST
58
59TEST_F(RollingHashSimpleTest, KBaseIsAPowerOfTwo) {
60  EXPECT_EQ(0U, kBase & (kBase - 1));
61}
62
63TEST_F(RollingHashSimpleTest, TestModBaseForValues) {
64  TestModBase(0);
65  TestModBase(10);
66  TestModBase(static_cast<uint32_t>(-10));
67  TestModBase(kBase - 1);
68  TestModBase(kBase);
69  TestModBase(kBase + 1);
70  TestModBase(0x7FFFFFFF);
71  TestModBase(0x80000000);
72  TestModBase(0xFFFFFFFE);
73  TestModBase(0xFFFFFFFF);
74}
75
76TEST_F(RollingHashSimpleTest, VerifyHashFirstTwoBytes) {
77  TestHashFirstTwoBytes(0x00, 0x00);
78  TestHashFirstTwoBytes(0x00, 0xFF);
79  TestHashFirstTwoBytes(0xFF, 0x00);
80  TestHashFirstTwoBytes(0xFF, 0xFF);
81  TestHashFirstTwoBytes(0x00, 0x80);
82  TestHashFirstTwoBytes(0x7F, 0xFF);
83  TestHashFirstTwoBytes(0x7F, 0x80);
84  TestHashFirstTwoBytes(0x01, 0x8F);
85}
86
87#ifdef GTEST_HAS_DEATH_TEST
88TEST_F(RollingHashDeathTest, InstantiateRollingHashWithoutCallingInit) {
89  EXPECT_DEBUG_DEATH(RollingHash<16> bad_hash, "Init");
90}
91#endif  // GTEST_HAS_DEATH_TEST
92
93class RollingHashTest : public testing::Test {
94 public:
95  static const int kUpdateHashBlocks = 1000;
96  static const int kLargestBlockSize = 128;
97
98  static void MakeRandomBuffer(char* buffer, int buffer_size) {
99    for (int i = 0; i < buffer_size; ++i) {
100      buffer[i] = PortableRandomInRange<unsigned char>(0xFF);
101    }
102  }
103
104  template<int kBlockSize> static void BM_DefaultHash(int iterations,
105                                                      const char *buffer) {
106    RollingHash<kBlockSize> hasher;
107    static uint32_t result_array[kUpdateHashBlocks];
108    for (int iter = 0; iter < iterations; ++iter) {
109      for (int i = 0; i < kUpdateHashBlocks; ++i) {
110        result_array[i] = hasher.Hash(&buffer[i]);
111      }
112    }
113  }
114
115  template<int kBlockSize> static void BM_UpdateHash(int iterations,
116                                                     const char *buffer) {
117    RollingHash<kBlockSize> hasher;
118    static uint32_t result_array[kUpdateHashBlocks];
119    for (int iter = 0; iter < iterations; ++iter) {
120      uint32_t running_hash = hasher.Hash(buffer);
121      for (int i = 0; i < kUpdateHashBlocks; ++i) {
122        running_hash = hasher.UpdateHash(running_hash,
123                                         buffer[i],
124                                         buffer[i + kBlockSize]);
125        result_array[i] = running_hash;
126      }
127    }
128  }
129
130 protected:
131  static const int kUpdateHashTestIterations = 400;
132  static const int kTimingTestSize = 1 << 14;  // 16K iterations
133
134  RollingHashTest() { }
135  virtual ~RollingHashTest() { }
136
137  template<int kBlockSize> void UpdateHashMatchesHashForBlockSize() {
138    RollingHash<kBlockSize>::Init();
139    RollingHash<kBlockSize> hasher;
140    for (int x = 0; x < kUpdateHashTestIterations; ++x) {
141      int random_buffer_size =
142          PortableRandomInRange(kUpdateHashBlocks - 1) + kBlockSize;
143      MakeRandomBuffer(buffer_, random_buffer_size);
144      uint32_t running_hash = hasher.Hash(buffer_);
145      for (int i = kBlockSize; i < random_buffer_size; ++i) {
146        // UpdateHash() calculates the hash value incrementally.
147        running_hash = hasher.UpdateHash(running_hash,
148                                         buffer_[i - kBlockSize],
149                                         buffer_[i]);
150        // Hash() calculates the hash value from scratch.  Verify that both
151        // methods return the same hash value.
152        EXPECT_EQ(running_hash, hasher.Hash(&buffer_[i + 1 - kBlockSize]));
153      }
154    }
155  }
156
157  template<int kBlockSize> double DefaultHashTimingTest() {
158    // Execution time is expected to be O(kBlockSize) per hash operation,
159    // so scale the number of iterations accordingly
160    const int kTimingTestIterations = kTimingTestSize / kBlockSize;
161    CycleTimer timer;
162    timer.Start();
163    BM_DefaultHash<kBlockSize>(kTimingTestIterations, buffer_);
164    timer.Stop();
165    return static_cast<double>(timer.GetInUsec())
166        / (kTimingTestIterations * kUpdateHashBlocks);
167  }
168
169  template<int kBlockSize> double RollingTimingTest() {
170    // Execution time is expected to be O(1) per hash operation,
171    // so leave the number of iterations constant
172    const int kTimingTestIterations = kTimingTestSize;
173    CycleTimer timer;
174    timer.Start();
175    BM_UpdateHash<kBlockSize>(kTimingTestIterations, buffer_);
176    timer.Stop();
177    return static_cast<double>(timer.GetInUsec())
178        / (kTimingTestIterations * kUpdateHashBlocks);
179  }
180
181  double FindPercentage(double original, double modified) {
182    if (original < 0.0001) {
183      return 0.0;
184    } else {
185      return ((modified - original) / original) * 100.0;
186    }
187  }
188
189  template<int kBlockSize> void RunTimingTestForBlockSize() {
190    RollingHash<kBlockSize>::Init();
191    MakeRandomBuffer(buffer_, sizeof(buffer_));
192    const double time_for_default_hash = DefaultHashTimingTest<kBlockSize>();
193    const double time_for_rolling_hash = RollingTimingTest<kBlockSize>();
194    printf("%d\t%f\t%f (%f%%)\n",
195           kBlockSize,
196           time_for_default_hash,
197           time_for_rolling_hash,
198           FindPercentage(time_for_default_hash, time_for_rolling_hash));
199    CHECK_GT(time_for_default_hash, 0.0);
200    CHECK_GT(time_for_rolling_hash, 0.0);
201    if (kBlockSize > 16) {
202      EXPECT_GT(time_for_default_hash, time_for_rolling_hash);
203    }
204  }
205
206  char buffer_[kUpdateHashBlocks + kLargestBlockSize];
207};
208
209TEST_F(RollingHashTest, UpdateHashMatchesHashFromScratch) {
210  srand(1);  // test should be deterministic, including calls to rand()
211  UpdateHashMatchesHashForBlockSize<4>();
212  UpdateHashMatchesHashForBlockSize<8>();
213  UpdateHashMatchesHashForBlockSize<16>();
214  UpdateHashMatchesHashForBlockSize<32>();
215  UpdateHashMatchesHashForBlockSize<64>();
216  UpdateHashMatchesHashForBlockSize<128>();
217}
218
219TEST_F(RollingHashTest, TimingTests) {
220  srand(1);  // test should be deterministic, including calls to rand()
221  printf("BlkSize\tHash (us)\tUpdateHash (us)\n");
222  RunTimingTestForBlockSize<4>();
223  RunTimingTestForBlockSize<8>();
224  RunTimingTestForBlockSize<16>();
225  RunTimingTestForBlockSize<32>();
226  RunTimingTestForBlockSize<64>();
227  RunTimingTestForBlockSize<128>();
228}
229
230}  // anonymous namespace
231}  // namespace open_vcdiff
232