1// Copyright 2008 Google Inc.
2// Author: Lincoln Smith
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8//      http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16#include <config.h>
17#include "blockhash.h"
18#include <limits.h>  // INT_MIN
19#include <string.h>  // memcpy, memcmp, strlen
20#include <iostream>
21#include <memory>  // auto_ptr
22#include "encodetable.h"
23#include "rolling_hash.h"
24#include "testing.h"
25
26namespace open_vcdiff {
27
28const int kBlockSize = BlockHash::kBlockSize;
29
30class BlockHashTest : public testing::Test {
31 protected:
32  static const int kTimingTestSize = 1 << 21;  // 2M
33  static const int kTimingTestIterations = 32;
34
35  BlockHashTest() {
36    dh_.reset(BlockHash::CreateDictionaryHash(sample_text,
37                                              strlen(sample_text)));
38    th_.reset(BlockHash::CreateTargetHash(sample_text, strlen(sample_text), 0));
39    EXPECT_TRUE(dh_.get() != NULL);
40    EXPECT_TRUE(th_.get() != NULL);
41  }
42
43  // BlockHashTest is a friend to BlockHash.  Expose the protected functions
44  // that will be tested by the children of BlockHashTest.
45  static bool BlockContentsMatch(const char* block1, const char* block2) {
46    return BlockHash::BlockContentsMatch(block1, block2);
47  }
48
49  int FirstMatchingBlock(const BlockHash& block_hash,
50                         uint32_t hash_value,
51                         const char* block_ptr) const {
52    return block_hash.FirstMatchingBlock(hash_value, block_ptr);
53  }
54
55  int NextMatchingBlock(const BlockHash& block_hash,
56                        int block_number,
57                        const char* block_ptr) const {
58    return block_hash.NextMatchingBlock(block_number, block_ptr);
59  }
60
61  static int MatchingBytesToLeft(const char* source_match_start,
62                                 const char* target_match_start,
63                                 int max_bytes) {
64    return BlockHash::MatchingBytesToLeft(source_match_start,
65                                          target_match_start,
66                                          max_bytes);
67  }
68
69  static int MatchingBytesToRight(const char* source_match_end,
70                                  const char* target_match_end,
71                                  int max_bytes) {
72    return BlockHash::MatchingBytesToRight(source_match_end,
73                                           target_match_end,
74                                           max_bytes);
75  }
76
77  static int StringLengthAsInt(const char* s) {
78    return static_cast<int>(strlen(s));
79  }
80
81  void InitBlocksToDifferAtNthByte(int n) {
82    CHECK(n < kBlockSize);
83    memset(compare_buffer_1_, 0xBE, kTimingTestSize);
84    memset(compare_buffer_2_, 0xBE, kTimingTestSize);
85    for (int index = n; index < kTimingTestSize; index += kBlockSize) {
86      compare_buffer_1_[index] = 0x00;
87      compare_buffer_2_[index] = 0x01;
88    }
89  }
90
91  void TestAndPrintTimesForCompareFunctions(bool should_be_identical);
92
93  void TimingTestForBlocksThatDifferAtByte(int n) {
94    InitBlocksToDifferAtNthByte(n);
95    std::cout << "Comparing blocks that differ at byte " << n << std::endl;
96    TestAndPrintTimesForCompareFunctions(false);
97  }
98
99  // Copy sample_text_without_spaces and search_string_without_spaces
100  // into newly allocated sample_text and search_string buffers,
101  // but pad them with space characters so that every character
102  // in sample_text_without_spaces matches (kBlockSize - 1)
103  // space characters in sample_text, followed by that character.
104  // For example:
105  // Since sample_text_without_spaces begins "The only thing"...,
106  // if kBlockSize is 4, then 3 space characters will be inserted
107  // between each letter of sample_text, as follows:
108  // "   T   h   e       o   n   l   y       t   h   i   n   g"...
109  // This makes testing simpler, because finding a kBlockSize-byte match
110  // between the sample text and search string only depends on the
111  // trailing letter in each block.
112  static void MakeEachLetterABlock(const char* string_without_spaces,
113                                   const char** result) {
114    const size_t length_without_spaces = strlen(string_without_spaces);
115    char* padded_text = new char[(kBlockSize * length_without_spaces) + 1];
116    memset(padded_text, ' ', kBlockSize * length_without_spaces);
117    char* padded_text_ptr = padded_text + (kBlockSize - 1);
118    for (size_t i = 0; i < length_without_spaces; ++i) {
119      *padded_text_ptr = string_without_spaces[i];
120      padded_text_ptr += kBlockSize;
121    }
122    padded_text[kBlockSize * length_without_spaces] = '\0';
123    *result = padded_text;
124  }
125
126  static void SetUpTestCase() {
127    MakeEachLetterABlock(sample_text_without_spaces, &sample_text);
128    MakeEachLetterABlock(search_string_without_spaces, &search_string);
129    MakeEachLetterABlock(search_string_altered_without_spaces,
130                         &search_string_altered);
131    MakeEachLetterABlock(search_to_end_without_spaces, &search_to_end_string);
132    MakeEachLetterABlock(search_to_beginning_without_spaces,
133                         &search_to_beginning_string);
134    MakeEachLetterABlock(sample_text_many_matches_without_spaces,
135                         &sample_text_many_matches);
136    MakeEachLetterABlock(search_string_many_matches_without_spaces,
137                         &search_string_many_matches);
138    MakeEachLetterABlock("y", &test_string_y);
139    MakeEachLetterABlock("e", &test_string_e);
140    char* new_test_string_unaligned_e = new char[kBlockSize];
141    memset(new_test_string_unaligned_e, ' ', kBlockSize);
142    new_test_string_unaligned_e[kBlockSize - 2] = 'e';
143    test_string_unaligned_e = new_test_string_unaligned_e;
144    char* new_test_string_all_Qs = new char[kBlockSize];
145    memset(new_test_string_all_Qs, 'Q', kBlockSize);
146    test_string_all_Qs = new_test_string_all_Qs;
147    hashed_y = RollingHash<kBlockSize>::Hash(test_string_y);
148    hashed_e = RollingHash<kBlockSize>::Hash(test_string_e);
149    hashed_f =
150        RollingHash<kBlockSize>::Hash(&search_string[index_of_f_in_fearsome]);
151    hashed_unaligned_e = RollingHash<kBlockSize>::Hash(test_string_unaligned_e);
152    hashed_all_Qs = RollingHash<kBlockSize>::Hash(test_string_all_Qs);
153  }
154
155  static void TearDownTestCase() {
156    delete[] sample_text;
157    delete[] search_string;
158    delete[] search_string_altered;
159    delete[] search_to_end_string;
160    delete[] search_to_beginning_string;
161    delete[] sample_text_many_matches;
162    delete[] search_string_many_matches;
163    delete[] test_string_y;
164    delete[] test_string_e;
165    delete[] test_string_unaligned_e;
166    delete[] test_string_all_Qs;
167  }
168
169  // Each block in the sample text and search string is kBlockSize bytes long,
170  // and consists of (kBlockSize - 1) space characters
171  // followed by a single letter of text.
172
173  // Block numbers of certain characters within the sample text:
174  // All six occurrences of "e", in order.
175  static const int block_of_first_e = 2;
176  static const int block_of_second_e = 16;
177  static const int block_of_third_e = 21;
178  static const int block_of_fourth_e = 27;
179  static const int block_of_fifth_e = 35;
180  static const int block_of_sixth_e = 42;
181
182  static const int block_of_y_in_only = 7;
183  // The block number is multiplied by kBlockSize to arrive at the
184  // index, which points to the (kBlockSize - 1) space characters before
185  // the letter specified.
186  // Indices of certain characters within the sample text.
187  static const int index_of_first_e = block_of_first_e * kBlockSize;
188  static const int index_of_fourth_e = block_of_fourth_e * kBlockSize;
189  static const int index_of_sixth_e = block_of_sixth_e * kBlockSize;
190  static const int index_of_y_in_only = block_of_y_in_only * kBlockSize;
191  static const int index_of_space_before_fear_is_fear = 25 * kBlockSize;
192  static const int index_of_longest_match_ear_is_fear = 27 * kBlockSize;
193  static const int index_of_i_in_fear_is_fear = 31 * kBlockSize;
194  static const int index_of_space_before_fear_itself = 33 * kBlockSize;
195  static const int index_of_space_before_itself = 38 * kBlockSize;
196  static const int index_of_ababc = 4 * kBlockSize;
197
198  // Indices of certain characters within the search strings.
199  static const int index_of_second_w_in_what_we = 5 * kBlockSize;
200  static const int index_of_second_e_in_what_we_hear = 9 * kBlockSize;
201  static const int index_of_f_in_fearsome = 16 * kBlockSize;
202  static const int index_of_space_in_eat_itself =  12 * kBlockSize;
203  static const int index_of_i_in_itself = 13 * kBlockSize;
204  static const int index_of_t_in_use_the = 4 * kBlockSize;
205  static const int index_of_o_in_online = 8 * kBlockSize;
206
207  static const char sample_text_without_spaces[];
208  static const char search_string_without_spaces[];
209  static const char search_string_altered_without_spaces[];
210  static const char search_to_end_without_spaces[];
211  static const char search_to_beginning_without_spaces[];
212  static const char sample_text_many_matches_without_spaces[];
213  static const char search_string_many_matches_without_spaces[];
214
215  static const char* sample_text;
216  static const char* search_string;
217  static const char* search_string_altered;
218  static const char* search_to_end_string;
219  static const char* search_to_beginning_string;
220  static const char* sample_text_many_matches;
221  static const char* search_string_many_matches;
222
223  static const char* test_string_y;
224  static const char* test_string_e;
225  static const char* test_string_all_Qs;
226  static const char* test_string_unaligned_e;
227
228  static uint32_t hashed_y;
229  static uint32_t hashed_e;
230  static uint32_t hashed_f;
231  static uint32_t hashed_unaligned_e;
232  static uint32_t hashed_all_Qs;
233
234  // Boost scoped_ptr, if available, could be used instead of std::auto_ptr.
235  std::auto_ptr<const BlockHash> dh_;  // hash table is populated at startup
236  std::auto_ptr<BlockHash> th_;  // hash table not populated;
237                                // used to test incremental adds
238
239  BlockHash::Match best_match_;
240  char* compare_buffer_1_;
241  char* compare_buffer_2_;
242  int prime_result_;
243};
244
245#ifdef GTEST_HAS_DEATH_TEST
246typedef BlockHashTest BlockHashDeathTest;
247#endif  // GTEST_HAS_DEATH_TEST
248
249// The C++ standard requires a separate definition of these static const values,
250// even though their initializers are given within the class definition.
251const int BlockHashTest::block_of_first_e;
252const int BlockHashTest::block_of_second_e;
253const int BlockHashTest::block_of_third_e;
254const int BlockHashTest::block_of_fourth_e;
255const int BlockHashTest::block_of_fifth_e;
256const int BlockHashTest::block_of_sixth_e;
257const int BlockHashTest::block_of_y_in_only;
258const int BlockHashTest::index_of_first_e;
259const int BlockHashTest::index_of_fourth_e;
260const int BlockHashTest::index_of_sixth_e;
261const int BlockHashTest::index_of_y_in_only;
262const int BlockHashTest::index_of_space_before_fear_is_fear;
263const int BlockHashTest::index_of_longest_match_ear_is_fear;
264const int BlockHashTest::index_of_i_in_fear_is_fear;
265const int BlockHashTest::index_of_space_before_fear_itself;
266const int BlockHashTest::index_of_space_before_itself;
267const int BlockHashTest::index_of_ababc;
268const int BlockHashTest::index_of_second_w_in_what_we;
269const int BlockHashTest::index_of_second_e_in_what_we_hear;
270const int BlockHashTest::index_of_f_in_fearsome;
271const int BlockHashTest::index_of_space_in_eat_itself;
272const int BlockHashTest::index_of_i_in_itself;
273const int BlockHashTest::index_of_t_in_use_the;
274const int BlockHashTest::index_of_o_in_online;
275
276const char BlockHashTest::sample_text_without_spaces[] =
277    "The only thing we have to fear is fear itself";
278
279const char BlockHashTest::search_string_without_spaces[] =
280    "What we hear is fearsome";
281
282const char BlockHashTest::search_string_altered_without_spaces[] =
283    "Vhat ve hear is fearsomm";
284
285const char BlockHashTest::search_to_end_without_spaces[] =
286    "Pop will eat itself, eventually";
287
288const char BlockHashTest::search_to_beginning_without_spaces[] =
289    "Use The online dictionary";
290
291const char BlockHashTest::sample_text_many_matches_without_spaces[] =
292    "ababababcab";
293
294const char BlockHashTest::search_string_many_matches_without_spaces[] =
295    "ababc";
296
297const char* BlockHashTest::sample_text = NULL;
298const char* BlockHashTest::search_string = NULL;
299const char* BlockHashTest::search_string_altered = NULL;
300const char* BlockHashTest::search_to_end_string = NULL;
301const char* BlockHashTest::search_to_beginning_string = NULL;
302const char* BlockHashTest::sample_text_many_matches = NULL;
303const char* BlockHashTest::search_string_many_matches = NULL;
304
305const char* BlockHashTest::test_string_y = NULL;
306const char* BlockHashTest::test_string_e = NULL;
307const char* BlockHashTest::test_string_unaligned_e = NULL;
308const char* BlockHashTest::test_string_all_Qs = NULL;
309
310uint32_t BlockHashTest::hashed_y = 0;
311uint32_t BlockHashTest::hashed_e = 0;
312uint32_t BlockHashTest::hashed_f = 0;
313uint32_t BlockHashTest::hashed_unaligned_e = 0;
314uint32_t BlockHashTest::hashed_all_Qs = 0;
315
316void BlockHashTest::TestAndPrintTimesForCompareFunctions(
317    bool should_be_identical) {
318  CHECK(compare_buffer_1_ != NULL);
319  CHECK(compare_buffer_2_ != NULL);
320  // Prime the memory cache.
321  prime_result_ =
322      memcmp(compare_buffer_1_, compare_buffer_2_, kTimingTestSize);
323  const char* const block1_limit =
324      &compare_buffer_1_[kTimingTestSize - kBlockSize];
325  int block_compare_words_result = 0;
326  CycleTimer block_compare_words_timer;
327  block_compare_words_timer.Start();
328  for (int i = 0; i < kTimingTestIterations; ++i) {
329    const char* block1 = compare_buffer_1_;
330    const char* block2 = compare_buffer_2_;
331    while (block1 < block1_limit) {
332      if (!BlockHash::BlockCompareWords(block1, block2)) {
333        ++block_compare_words_result;
334      }
335      block1 += kBlockSize;
336      block2 += kBlockSize;
337    }
338  }
339  block_compare_words_timer.Stop();
340  double time_for_block_compare_words =
341      static_cast<double>(block_compare_words_timer.GetInUsec())
342      / ((kTimingTestSize / kBlockSize) * kTimingTestIterations);
343  int block_contents_match_result = 0;
344  CycleTimer block_contents_match_timer;
345  block_contents_match_timer.Start();
346  for (int i = 0; i < kTimingTestIterations; ++i) {
347    const char* block1 = compare_buffer_1_;
348    const char* block2 = compare_buffer_2_;
349    while (block1 < block1_limit) {
350      if (!BlockHash::BlockContentsMatch(block1, block2)) {
351        ++block_contents_match_result;
352      }
353      block1 += kBlockSize;
354      block2 += kBlockSize;
355    }
356  }
357  block_contents_match_timer.Stop();
358  double time_for_block_contents_match =
359      static_cast<double>(block_contents_match_timer.GetInUsec())
360      / ((kTimingTestSize / kBlockSize) * kTimingTestIterations);
361  EXPECT_EQ(block_contents_match_result, block_compare_words_result);
362  if (should_be_identical) {
363    CHECK_EQ(0, block_compare_words_result);
364  } else {
365    CHECK_GT(block_compare_words_result, 0);
366  }
367  std::cout << "BlockHash::BlockCompareWords: "
368            << time_for_block_compare_words << " us per operation" << std::endl;
369  std::cout << "BlockHash::BlockContentsMatch: "
370            << time_for_block_contents_match << " us per operation"
371            << std::endl;
372  if (time_for_block_compare_words > 0) {
373    double percent_change =
374        (((time_for_block_contents_match - time_for_block_compare_words)
375          / time_for_block_compare_words) * 100.0);
376    if (percent_change >= 0.0) {
377      std::cout << "BlockContentsMatch is " << percent_change << "%"
378                << " SLOWER than BlockCompareWords" << std::endl;
379    } else {
380      std::cout << "BlockContentsMatch is " << (-percent_change) << "%"
381                << " FASTER than BlockCompareWords" << std::endl;
382    }
383  }
384#if defined(NDEBUG) && !defined(VCDIFF_USE_BLOCK_COMPARE_WORDS)
385  // Only check timings for optimized build.  There's plenty of margin: this
386  // check will fail only if BlockContentsMatch is at least twice as slow as
387  // BlockCompareWords.
388  EXPECT_GT(time_for_block_compare_words * 2.0, time_for_block_contents_match);
389#endif  // NDEBUG && !VCDIFF_USE_BLOCK_COMPARE_WORDS
390}
391
392// The two strings passed to BlockHash::MatchingBytesToLeft do have matching
393// characters -- in fact, they're the same string -- but since max_bytes is zero
394// or negative, BlockHash::MatchingBytesToLeft should not read from the strings
395// and should return 0.
396TEST_F(BlockHashTest, MaxBytesZeroDoesNothing) {
397  EXPECT_EQ(0, MatchingBytesToLeft(
398      &search_string[index_of_f_in_fearsome],
399      &search_string[index_of_f_in_fearsome],
400      0));
401  EXPECT_EQ(0, MatchingBytesToRight(
402      &search_string[index_of_f_in_fearsome],
403      &search_string[index_of_f_in_fearsome],
404      0));
405}
406
407TEST_F(BlockHashTest, MaxBytesNegativeDoesNothing) {
408  EXPECT_EQ(0, MatchingBytesToLeft(
409      &search_string[index_of_f_in_fearsome],
410      &search_string[index_of_f_in_fearsome],
411      -1));
412  EXPECT_EQ(0, MatchingBytesToLeft(
413      &search_string[index_of_f_in_fearsome],
414      &search_string[index_of_f_in_fearsome],
415      INT_MIN));
416  EXPECT_EQ(0, MatchingBytesToRight(
417      &search_string[index_of_f_in_fearsome],
418      &search_string[index_of_f_in_fearsome],
419      -1));
420  EXPECT_EQ(0, MatchingBytesToRight(
421      &search_string[index_of_f_in_fearsome],
422      &search_string[index_of_f_in_fearsome],
423      INT_MIN));
424}
425
426TEST_F(BlockHashTest, MaxBytesOneMatch) {
427  EXPECT_EQ(1, MatchingBytesToLeft(
428      &search_string[index_of_f_in_fearsome],
429      &search_string[index_of_f_in_fearsome],
430      1));
431  EXPECT_EQ(1, MatchingBytesToRight(
432      &search_string[index_of_f_in_fearsome],
433      &search_string[index_of_f_in_fearsome],
434      1));
435}
436
437TEST_F(BlockHashTest, MaxBytesOneNoMatch) {
438  EXPECT_EQ(0, MatchingBytesToLeft(
439      &search_string[index_of_f_in_fearsome],
440      &search_string[index_of_second_e_in_what_we_hear],
441      1));
442  EXPECT_EQ(0, MatchingBytesToRight(
443      &search_string[index_of_f_in_fearsome],
444      &search_string[index_of_second_e_in_what_we_hear - 1],
445      1));
446}
447
448TEST_F(BlockHashTest, LeftLimitedByMaxBytes) {
449  // The number of bytes that match between the original "we hear is fearsome"
450  // and the altered "ve hear is fearsome".
451  const int expected_length = kBlockSize * StringLengthAsInt("e hear is ");
452  const int max_bytes = expected_length - 1;
453  EXPECT_EQ(max_bytes, MatchingBytesToLeft(
454      &search_string[index_of_f_in_fearsome],
455      &search_string_altered[index_of_f_in_fearsome],
456      max_bytes));
457}
458
459TEST_F(BlockHashTest, LeftNotLimited) {
460  // The number of bytes that match between the original "we hear is fearsome"
461  // and the altered "ve hear is fearsome".
462  const int expected_length = kBlockSize * StringLengthAsInt("e hear is ");
463  const int max_bytes = expected_length + 1;
464  EXPECT_EQ(expected_length, MatchingBytesToLeft(
465      &search_string[index_of_f_in_fearsome],
466      &search_string_altered[index_of_f_in_fearsome],
467      max_bytes));
468  EXPECT_EQ(expected_length, MatchingBytesToLeft(
469      &search_string[index_of_f_in_fearsome],
470      &search_string_altered[index_of_f_in_fearsome],
471      INT_MAX));
472}
473
474TEST_F(BlockHashTest, RightLimitedByMaxBytes) {
475  // The number of bytes that match between the original "fearsome"
476  // and the altered "fearsomm".
477  const int expected_length = (kBlockSize * StringLengthAsInt("fearsom"))
478                              + (kBlockSize - 1);  // spacing between letters
479  const int max_bytes = expected_length - 1;
480  EXPECT_EQ(max_bytes, MatchingBytesToRight(
481      &search_string[index_of_f_in_fearsome],
482      &search_string_altered[index_of_f_in_fearsome],
483      max_bytes));
484}
485
486TEST_F(BlockHashTest, RightNotLimited) {
487  // The number of bytes that match between the original "we hear is fearsome"
488  // and the altered "ve hear is fearsome".
489  const int expected_length = (kBlockSize * StringLengthAsInt("fearsom"))
490                              + (kBlockSize - 1);  // spacing between letters
491  const int max_bytes = expected_length + 1;
492  EXPECT_EQ(expected_length, MatchingBytesToRight(
493      &search_string[index_of_f_in_fearsome],
494      &search_string_altered[index_of_f_in_fearsome],
495      max_bytes));
496  EXPECT_EQ(expected_length, MatchingBytesToRight(
497      &search_string[index_of_f_in_fearsome],
498      &search_string_altered[index_of_f_in_fearsome],
499      INT_MAX));
500}
501
502// If this test fails in a non-x86 or non-gcc environment, consider adding
503// -DVCDIFF_USE_BLOCK_COMPARE_WORDS to AM_CXXFLAGS in Makefile.am and
504// Makefile.in, and reconstructing the Makefile.  That will cause blockhash.cc
505// to use a special implementation (BlockCompareWords) to compare blocks
506// rather than using standard memcmp.
507TEST_F(BlockHashTest, BlockContentsMatchIsAsFastAsBlockCompareWords) {
508  compare_buffer_1_ = new char[kTimingTestSize];
509  compare_buffer_2_ = new char[kTimingTestSize];
510
511  // The value 0xBE is arbitrarily chosen.  First test with identical contents
512  // in the buffers, so that the comparison functions cannot short-circuit
513  // and will return true.
514  memset(compare_buffer_1_, 0xBE, kTimingTestSize);
515  memset(compare_buffer_2_, 0xBE, kTimingTestSize);
516  std::cout << "Comparing "
517            << (kTimingTestSize / kBlockSize) << " identical values:"
518            << std::endl;
519  TestAndPrintTimesForCompareFunctions(true);
520
521  // Now change one value in the middle of one buffer, so that the contents
522  // are no longer the same.
523  compare_buffer_1_[kTimingTestSize / 2] = 0x00;
524  std::cout << "Comparing "
525            << ((kTimingTestSize / kBlockSize) - 1) << " identical values"
526            << " and one mismatch:" << std::endl;
527  TestAndPrintTimesForCompareFunctions(false);
528
529  // Set one of the bytes of each block to differ so that
530  // none of the compare operations will return true, and run timing tests.
531  // In practice, BlockHash::BlockContentsMatch will only be called
532  // for two blocks whose hash values match, and the two most important
533  // cases are: (1) the blocks are identical, or (2) none of their bytes match.
534  TimingTestForBlocksThatDifferAtByte(0);
535  TimingTestForBlocksThatDifferAtByte(1);
536  TimingTestForBlocksThatDifferAtByte(kBlockSize / 2);
537  TimingTestForBlocksThatDifferAtByte(kBlockSize - 1);
538
539  delete[] compare_buffer_1_;
540  delete[] compare_buffer_2_;
541}
542
543TEST_F(BlockHashTest, FindFailsBeforeHashing) {
544  EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y));
545}
546
547TEST_F(BlockHashTest, HashOneFindOne) {
548  for (int i = 0; i <= index_of_y_in_only; ++i) {
549    th_->AddOneIndexHash(i, RollingHash<kBlockSize>::Hash(&sample_text[i]));
550  }
551  EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*th_, hashed_y,
552                                                   test_string_y));
553  EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_y_in_only, test_string_y));
554}
555
556TEST_F(BlockHashTest, HashAllFindOne) {
557  EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*dh_, hashed_y,
558                                                   test_string_y));
559  EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_y_in_only, test_string_y));
560}
561
562TEST_F(BlockHashTest, NonMatchingTextNotFound) {
563  EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_all_Qs, test_string_all_Qs));
564}
565
566// Search for unaligned text.  The test string is contained in the
567// sample text (unlike the non-matching string in NonMatchingTextNotFound,
568// above), but it is not aligned on a block boundary.  FindMatchingBlock
569// will only work if the test string is aligned on a block boundary.
570//
571//    "   T   h   e       o   n   l   y"
572//              ^^^^ Here is the test string
573//
574TEST_F(BlockHashTest, UnalignedTextNotFound) {
575  EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_unaligned_e,
576                                   test_string_unaligned_e));
577}
578
579TEST_F(BlockHashTest, FindSixMatches) {
580  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e,
581                                                 test_string_e));
582  EXPECT_EQ(block_of_second_e, NextMatchingBlock(*dh_, block_of_first_e,
583                                                 test_string_e));
584  EXPECT_EQ(block_of_third_e, NextMatchingBlock(*dh_, block_of_second_e,
585                                                test_string_e));
586  EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*dh_, block_of_third_e,
587                                                 test_string_e));
588  EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*dh_, block_of_fourth_e,
589                                                test_string_e));
590  EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*dh_, block_of_fifth_e,
591                                                test_string_e));
592  EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_sixth_e, test_string_e));
593
594  // Starting over gives same result
595  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e,
596                                                 test_string_e));
597}
598
599TEST_F(BlockHashTest, AddRangeFindThreeMatches) {
600  // Add hash values only for those characters before the fourth instance
601  // of "e" in the sample text.  Tests that the ending index
602  // of AddAllBlocksThroughIndex() is not inclusive: only three matches
603  // for "e" should be found.
604  th_->AddAllBlocksThroughIndex(index_of_fourth_e);
605  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
606                                                 test_string_e));
607  EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
608                                                 test_string_e));
609  EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
610                                                test_string_e));
611  EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e));
612
613  // Starting over gives same result
614  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
615                                                 test_string_e));
616}
617
618// Try indices that are not even multiples of the block size.
619// Add three ranges and verify the results after each
620// call to AddAllBlocksThroughIndex().
621TEST_F(BlockHashTest, AddRangeWithUnalignedIndices) {
622  th_->AddAllBlocksThroughIndex(index_of_first_e + 1);
623  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
624                                                 test_string_e));
625  EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_first_e, test_string_e));
626
627  // Starting over gives same result
628  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
629                                                 test_string_e));
630
631  // Add the second range to expand the result set
632  th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3);
633  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
634                                                 test_string_e));
635  EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
636                                                 test_string_e));
637  EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
638                                                test_string_e));
639  EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e));
640
641  // Starting over gives same result
642  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
643                                                 test_string_e));
644
645  // Add the third range to expand the result set
646  th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1);
647
648  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
649                                                 test_string_e));
650  EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
651                                                 test_string_e));
652  EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
653                                                test_string_e));
654  EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
655                                                 test_string_e));
656  EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));
657
658  // Starting over gives same result
659  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
660                                                 test_string_e));
661}
662
663#ifdef GTEST_HAS_DEATH_TEST
664TEST_F(BlockHashDeathTest, AddingRangesInDescendingOrderNoEffect) {
665  th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1);
666
667  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
668                                                 test_string_e));
669  EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
670                                                 test_string_e));
671  EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
672                                                test_string_e));
673  EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
674                                                 test_string_e));
675  EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));
676
677  // Starting over gives same result
678  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
679                                                 test_string_e));
680
681  // These calls will produce DFATAL error messages, and will do nothing,
682  // since the ranges have already been added.
683  EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3),
684                     "<");
685  EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_first_e + 1),
686                     "<");
687
688  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
689                                                 test_string_e));
690  EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
691                                                 test_string_e));
692  EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
693                                                test_string_e));
694  EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
695                                                 test_string_e));
696  EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));
697
698  // Starting over gives same result
699  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
700                                                 test_string_e));
701}
702#endif  // GTEST_HAS_DEATH_TEST
703
704TEST_F(BlockHashTest, AddEntireRangeFindSixMatches) {
705  th_->AddAllBlocksThroughIndex(StringLengthAsInt(sample_text));
706  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
707                                                 test_string_e));
708  EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
709                                                 test_string_e));
710  EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
711                                                test_string_e));
712  EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
713                                                 test_string_e));
714  EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*th_, block_of_fourth_e,
715                                                test_string_e));
716  EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*th_, block_of_fifth_e,
717                                                test_string_e));
718  EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_sixth_e, test_string_e));
719
720  // Starting over gives same result
721  EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
722                                                 test_string_e));
723}
724
725TEST_F(BlockHashTest, ZeroSizeSourceAccepted) {
726  BlockHash zero_sized_hash(sample_text, 0, 0);
727  EXPECT_EQ(true, zero_sized_hash.Init(true));
728  EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y));
729}
730
731#ifdef GTEST_HAS_DEATH_TEST
732TEST_F(BlockHashDeathTest, BadNextMatchingBlockReturnsNoMatch) {
733  EXPECT_DEBUG_DEATH(EXPECT_EQ(-1, NextMatchingBlock(*dh_, 0xFFFFFFFE, "    ")),
734                     "invalid");
735}
736
737TEST_F(BlockHashDeathTest, CallingInitTwiceIsIllegal) {
738  BlockHash bh(sample_text, strlen(sample_text), 0);
739  EXPECT_TRUE(bh.Init(false));
740  EXPECT_DEBUG_DEATH(EXPECT_FALSE(bh.Init(false)), "twice");
741}
742
743TEST_F(BlockHashDeathTest, CallingAddBlockBeforeInitIsIllegal) {
744  BlockHash bh(sample_text, strlen(sample_text), 0);
745  EXPECT_DEBUG_DEATH(bh.AddAllBlocksThroughIndex(index_of_first_e),
746                     "called before");
747}
748
749TEST_F(BlockHashDeathTest, AddAllBlocksThroughIndexOutOfRange) {
750  EXPECT_DEBUG_DEATH(
751      th_->AddAllBlocksThroughIndex(static_cast<int>(strlen(sample_text) + 1)),
752      "higher than end");
753}
754#endif  // GTEST_HAS_DEATH_TEST
755
756TEST_F(BlockHashTest, UnknownFingerprintReturnsNoMatch) {
757  EXPECT_EQ(-1, FirstMatchingBlock(*dh_, 0xFAFAFAFA, "FAFA"));
758}
759
760TEST_F(BlockHashTest, FindBestMatch) {
761  dh_->FindBestMatch(hashed_f,
762                     &search_string[index_of_f_in_fearsome],
763                     search_string,
764                     strlen(search_string),
765                     &best_match_);
766  EXPECT_EQ(index_of_longest_match_ear_is_fear, best_match_.source_offset());
767  EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset());
768  // The match includes the spaces after the final character,
769  // which is why (kBlockSize - 1) is added to the expected best size.
770  EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1),
771            best_match_.size());
772}
773
774TEST_F(BlockHashTest, FindBestMatchWithStartingOffset) {
775  BlockHash th2(sample_text, strlen(sample_text), 0x10000);
776  th2.Init(true);  // hash all blocks
777  th2.FindBestMatch(hashed_f,
778                    &search_string[index_of_f_in_fearsome],
779                    search_string,
780                    strlen(search_string),
781                    &best_match_);
782  // Offset should begin with dictionary_size
783  EXPECT_EQ(0x10000 + (index_of_longest_match_ear_is_fear),
784            best_match_.source_offset());
785  EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset());
786  // The match includes the spaces after the final character,
787  // which is why (kBlockSize - 1) is added to the expected best size.
788  EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1),
789            best_match_.size());
790}
791
792TEST_F(BlockHashTest, BestMatchReachesEndOfDictionary) {
793  // Hash the "i" in "fear itself"
794  uint32_t hash_value = RollingHash<kBlockSize>::Hash(
795      &search_to_end_string[index_of_i_in_itself]);
796  dh_->FindBestMatch(hash_value,
797                     &search_to_end_string[index_of_i_in_itself],
798                     search_to_end_string,
799                     strlen(search_to_end_string),
800                     &best_match_);
801  EXPECT_EQ(index_of_space_before_itself, best_match_.source_offset());
802  EXPECT_EQ(index_of_space_in_eat_itself, best_match_.target_offset());
803  EXPECT_EQ(strlen(" itself") * kBlockSize, best_match_.size());
804}
805
806TEST_F(BlockHashTest, BestMatchReachesStartOfDictionary) {
807  // Hash the "i" in "fear itself"
808  uint32_t hash_value = RollingHash<kBlockSize>::Hash(
809      &search_to_beginning_string[index_of_o_in_online]);
810  dh_->FindBestMatch(hash_value,
811                     &search_to_beginning_string[index_of_o_in_online],
812                     search_to_beginning_string,
813                     strlen(search_to_beginning_string),
814                     &best_match_);
815  EXPECT_EQ(0, best_match_.source_offset());  // beginning of dictionary
816  EXPECT_EQ(index_of_t_in_use_the, best_match_.target_offset());
817  // The match includes the spaces after the final character,
818  // which is why (kBlockSize - 1) is added to the expected best size.
819  EXPECT_EQ((strlen("The onl") * kBlockSize) + (kBlockSize - 1),
820            best_match_.size());
821}
822
823TEST_F(BlockHashTest, BestMatchWithManyMatches) {
824  BlockHash many_matches_hash(sample_text_many_matches,
825                              strlen(sample_text_many_matches),
826                              0);
827  EXPECT_TRUE(many_matches_hash.Init(true));
828  // Hash the "   a" at the beginning of the search string "ababc"
829  uint32_t hash_value =
830      RollingHash<kBlockSize>::Hash(search_string_many_matches);
831  many_matches_hash.FindBestMatch(hash_value,
832                                  search_string_many_matches,
833                                  search_string_many_matches,
834                                  strlen(search_string_many_matches),
835                                  &best_match_);
836  EXPECT_EQ(index_of_ababc, best_match_.source_offset());
837  EXPECT_EQ(0, best_match_.target_offset());
838  EXPECT_EQ(strlen(search_string_many_matches), best_match_.size());
839}
840
841TEST_F(BlockHashTest, HashCollisionFindsNoMatch) {
842  char* collision_search_string = new char[strlen(search_string) + 1];
843  memcpy(collision_search_string, search_string, strlen(search_string) + 1);
844  char* fearsome_location = &collision_search_string[index_of_f_in_fearsome];
845
846  // Tweak the collision string so that it has the same hash value
847  // but different text.  The last four characters of the search string
848  // should be "   f", and the bytes given below have the same hash value
849  // as those characters.
850  CHECK_GE(kBlockSize, 4);
851  fearsome_location[kBlockSize - 4] = 0x84;
852  fearsome_location[kBlockSize - 3] = 0xF1;
853  fearsome_location[kBlockSize - 2] = 0x51;
854  fearsome_location[kBlockSize - 1] = 0x00;
855  EXPECT_EQ(hashed_f, RollingHash<kBlockSize>::Hash(fearsome_location));
856  EXPECT_NE(0, memcmp(&search_string[index_of_f_in_fearsome],
857                      fearsome_location,
858                      kBlockSize));
859  // No match should be found this time.
860  dh_->FindBestMatch(hashed_f,
861      fearsome_location,
862      collision_search_string,
863      strlen(search_string),  // since collision_search_string has embedded \0
864      &best_match_);
865  EXPECT_EQ(-1, best_match_.source_offset());
866  EXPECT_EQ(-1, best_match_.target_offset());
867  EXPECT_EQ(0U, best_match_.size());
868  delete[] collision_search_string;
869}
870
871// If the footprint passed to FindBestMatch does not actually match
872// the search string, it should not find any matches.
873TEST_F(BlockHashTest, WrongFootprintFindsNoMatch) {
874  dh_->FindBestMatch(hashed_e,  // Using hashed value of "e" instead of "f"!
875                     &search_string[index_of_f_in_fearsome],
876                     search_string,
877                     strlen(search_string),
878                     &best_match_);
879  EXPECT_EQ(-1, best_match_.source_offset());
880  EXPECT_EQ(-1, best_match_.target_offset());
881  EXPECT_EQ(0U, best_match_.size());
882}
883
884// Use a dictionary containing 1M copies of the letter 'Q',
885// and target data that also contains 1M Qs.  If FindBestMatch
886// is not throttled to find a maximum number of matches, this
887// will take a very long time -- several seconds at least.
888// If this test appears to hang, it is because the throttling code
889// (see BlockHash::kMaxMatchesToCheck for details) is not working.
890TEST_F(BlockHashTest, SearchStringFindsTooManyMatches) {
891  const int kTestSize = 1 << 20;  // 1M
892  char* huge_dictionary = new char[kTestSize];
893  memset(huge_dictionary, 'Q', kTestSize);
894  BlockHash huge_bh(huge_dictionary, kTestSize, 0);
895  EXPECT_TRUE(huge_bh.Init(/* populate_hash_table = */ true));
896  char* huge_target = new char[kTestSize];
897  memset(huge_target, 'Q', kTestSize);
898  CycleTimer timer;
899  timer.Start();
900  huge_bh.FindBestMatch(hashed_all_Qs,
901                        huge_target + (kTestSize / 2),  // middle of target
902                        huge_target,
903                        kTestSize,
904                        &best_match_);
905  timer.Stop();
906  double elapsed_time_in_us = static_cast<double>(timer.GetInUsec());
907  std::cout << "Time to search for best match with 1M matches: "
908            << elapsed_time_in_us << " us" << std::endl;
909  // All blocks match the candidate block.  FindBestMatch should have checked
910  // a certain number of matches before giving up.  The best match
911  // should include at least half the source and target, since the candidate
912  // block was in the middle of the target data.
913  EXPECT_GT((kTestSize / 2), best_match_.source_offset());
914  EXPECT_GT((kTestSize / 2), best_match_.target_offset());
915  EXPECT_LT(static_cast<size_t>(kTestSize / 2), best_match_.size());
916  EXPECT_GT(5000000, elapsed_time_in_us);  // < 5 seconds
917#ifdef NDEBUG
918  EXPECT_GT(1000000, elapsed_time_in_us);  // < 1 second
919#endif  // NDEBUG
920  delete[] huge_target;
921  delete[] huge_dictionary;
922}
923
924#ifdef GTEST_HAS_DEATH_TEST
925TEST_F(BlockHashDeathTest, AddTooManyBlocks) {
926  for (int i = 0; i < StringLengthAsInt(sample_text_without_spaces); ++i) {
927    th_->AddOneIndexHash(i * kBlockSize, hashed_e);
928  }
929  // Didn't expect another block to be added
930  EXPECT_DEBUG_DEATH(th_->AddOneIndexHash(StringLengthAsInt(sample_text),
931                                          hashed_e),
932                     "AddBlock");
933}
934#endif  // GTEST_HAS_DEATH_TEST
935
936}  //  namespace open_vcdiff
937