1// Copyright 2008 Google Inc.
2// Author: Lincoln Smith
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8//      http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16#include <config.h>
17#include "vcdiffengine.h"
18#include <string.h>  // memset, strlen
19#include <algorithm>
20#include <string>
21#include "addrcache.h"
22#include "blockhash.h"
23#include "encodetable.h"
24#include "google/output_string.h"
25#include "rolling_hash.h"
26#include "testing.h"
27#include "varint_bigendian.h"
28#include "vcdiff_defs.h"
29
30namespace open_vcdiff {
31
32namespace {
33
34class VCDiffEngineTestBase : public testing::Test {
35 protected:
36  typedef std::string string;
37
38  // Some common definitions and helper functions used in the various tests
39  // for VCDiffEngine.
40  static const int kBlockSize = VCDiffEngine::kMinimumMatchSize;
41
42  VCDiffEngineTestBase() : interleaved_(false),
43                           diff_output_string_(&diff_),
44                           verify_position_(0),
45                           saved_total_size_position_(0),
46                           saved_delta_encoding_position_(0),
47                           saved_section_sizes_position_(0),
48                           data_bytes_(0),
49                           instruction_bytes_(0),
50                           address_bytes_(0) { }
51
52  virtual ~VCDiffEngineTestBase() { }
53
54  virtual void TearDown() {
55  }
56
57  // Copy string_without_spaces into newly allocated result buffer,
58  // but pad its contents with space characters so that every character
59  // in string_without_spaces corresponds to (block_size - 1)
60  // spaces in the result, followed by that character.
61  // For example:
62  // If string_without_spaces begins "The only thing"... and block_size is 4,
63  // then 3 space characters will be inserted
64  // between each letter in the result, as follows:
65  // "   T   h   e       o   n   l   y       t   h   i   n   g"...
66  // This makes testing simpler, because finding a block_size-byte match
67  // between the dictionary and target only depends on the
68  // trailing letter in each block.
69  // If no_initial_padding is true, then the first letter will not have
70  // spaces added in front of it.
71  static void MakeEachLetterABlock(const char* string_without_spaces,
72                                   const char** result,
73                                   int block_size,
74                                   bool no_initial_padding) {
75    const size_t length_without_spaces = strlen(string_without_spaces);
76    char* padded_text = new char[(block_size * length_without_spaces) + 1];
77    memset(padded_text, ' ', block_size * length_without_spaces);
78    char* padded_text_ptr = padded_text;
79    if (!no_initial_padding) {
80      padded_text_ptr += block_size - 1;
81    }
82    for (size_t i = 0; i < length_without_spaces; ++i) {
83      *padded_text_ptr = string_without_spaces[i];
84      padded_text_ptr += block_size;
85    }
86    *(padded_text_ptr - block_size + 1)  = '\0';
87    *result = padded_text;
88  }
89
90  // These functions iterate through the decoded output and expect
91  // simple elements: bytes or variable-length integers.
92  void ExpectByte(char byte) {
93    EXPECT_GT(diff_.size(), verify_position_);
94    EXPECT_EQ(byte, diff_[verify_position_]);
95    ++verify_position_;
96  }
97
98  size_t ExpectVarint(int32_t expected_value) {
99    EXPECT_GT(diff_.size(), verify_position_);
100    const char* const original_position = &diff_[verify_position_];
101    const char* new_position = original_position;
102    const size_t expected_length = VarintBE<int32_t>::Length(expected_value);
103    int32_t parsed_value = VarintBE<int32_t>::Parse(diff_.data() + diff_.size(),
104                                                    &new_position);
105    EXPECT_LE(0, parsed_value);
106    size_t parsed_length = new_position - original_position;
107    EXPECT_EQ(expected_value, parsed_value);
108    EXPECT_EQ(expected_length, parsed_length);
109    verify_position_ += parsed_length;
110    return parsed_length;
111  }
112
113  size_t ExpectSize(size_t size) {
114    return ExpectVarint(static_cast<int32_t>(size));
115  }
116
117  size_t ExpectStringLength(const char* s) {
118    return ExpectSize(strlen(s));
119  }
120
121  void SkipVarint() {
122    EXPECT_GT(diff_.size(), verify_position_);
123    const char* const original_position = &diff_[verify_position_];
124    const char* new_position = original_position;
125    VarintBE<int32_t>::Parse(diff_.data() + diff_.size(), &new_position);
126    size_t parsed_length = new_position - original_position;
127    verify_position_ += parsed_length;
128  }
129
130  void ExpectDataByte(char byte) {
131    ExpectByte(byte);
132    if (interleaved_) {
133      ++instruction_bytes_;
134    } else {
135      ++data_bytes_;
136    }
137  }
138
139  void ExpectDataString(const char *expected_string) {
140    const char* ptr = expected_string;
141    while (*ptr) {
142      ExpectDataByte(*ptr);
143      ++ptr;
144    }
145  }
146
147  void ExpectDataStringWithBlockSpacing(const char *expected_string,
148                                        bool trailing_spaces) {
149    const char* ptr = expected_string;
150    while (*ptr) {
151      for (int i = 0; i < (kBlockSize - 1); ++i) {
152        ExpectDataByte(' ');
153      }
154      ExpectDataByte(*ptr);
155      ++ptr;
156    }
157    if (trailing_spaces) {
158      for (int i = 0; i < (kBlockSize - 1); ++i) {
159        ExpectDataByte(' ');
160      }
161    }
162  }
163
164  void ExpectInstructionByte(char byte) {
165    ExpectByte(byte);
166    ++instruction_bytes_;
167  }
168
169  void ExpectInstructionVarint(int32_t value) {
170    instruction_bytes_ += ExpectVarint(value);
171  }
172
173  void ExpectAddressByte(char byte) {
174    ExpectByte(byte);
175    if (interleaved_) {
176      ++instruction_bytes_;
177    } else {
178      ++address_bytes_;
179    }
180  }
181
182  void ExpectAddressVarint(int32_t value) {
183    if (interleaved_) {
184      instruction_bytes_ += ExpectVarint(value);
185    } else {
186      address_bytes_ += ExpectVarint(value);
187    }
188  }
189
190  // The following functions leverage the fact that the encoder uses
191  // the default code table and cache sizes.  They are able to search for
192  // instructions of a particular size.  The logic for mapping from
193  // instruction type, mode, and size to opcode value is very different here
194  // from the logic used in encodetable.{h,cc}, so hopefully
195  // this version will help validate that the other is correct.
196  // This version uses conditional statements, while encodetable.h
197  // looks up values in a mapping table.
198  void ExpectAddress(int32_t address, int copy_mode) {
199    if (default_cache_.WriteAddressAsVarintForMode(copy_mode)) {
200      ExpectAddressVarint(address);
201    } else {
202      ExpectAddressByte(address);
203    }
204  }
205
206  void ExpectAddInstruction(int size) {
207    if (size <= 18) {
208      ExpectInstructionByte(0x01 + size);
209    } else {
210      ExpectInstructionByte(0x01);
211      ExpectInstructionVarint(size);
212    }
213  }
214
215  void ExpectCopyInstruction(int size, int mode) {
216    if ((size >= 4) && (size <= 16)) {
217      ExpectInstructionByte(0x10 + (0x10 * mode) + size);
218    } else {
219      ExpectInstructionByte(0x13 + (0x10 * mode));
220      ExpectInstructionVarint(size);
221    }
222  }
223
224  bool ExpectAddCopyInstruction(int add_size, int copy_size, int copy_mode) {
225    if (!default_cache_.IsSameMode(copy_mode) &&
226        (add_size <= 4) &&
227        (copy_size >= 4) &&
228        (copy_size <= 6)) {
229      ExpectInstructionByte(0x9C +
230                            (0x0C * copy_mode) +
231                            (0x03 * add_size) +
232                            copy_size);
233      return true;
234    } else if (default_cache_.IsSameMode(copy_mode) &&
235               (add_size <= 4) &&
236               (copy_size == 4)) {
237      ExpectInstructionByte(0xD2 + (0x04 * copy_mode) + add_size);
238      return true;
239    } else {
240      ExpectAddInstruction(add_size);
241      return false;
242    }
243  }
244
245  void ExpectAddInstructionForStringLength(const char* s) {
246    ExpectAddInstruction(static_cast<int>(strlen(s)));
247  }
248
249  // Call this function before beginning to iterate through the diff string
250  // using the Expect... functions.
251  // text must be NULL-terminated.
252  void VerifyHeaderForDictionaryAndTargetText(const char* dictionary,
253                                              const char* target_text) {
254    ExpectByte(0x01);  // Win_Indicator: VCD_SOURCE (dictionary)
255    ExpectStringLength(dictionary);
256    ExpectByte(0x00);  // Source segment position: start of dictionary
257    saved_total_size_position_ = verify_position_;
258    SkipVarint();  // Length of the delta encoding
259    saved_delta_encoding_position_ = verify_position_;
260    ExpectStringLength(target_text);
261    ExpectByte(0x00);  // Delta_indicator (no compression)
262    saved_section_sizes_position_ = verify_position_;
263    SkipVarint();  // length of data for ADDs and RUNs
264    SkipVarint();  // length of instructions section
265    SkipVarint();  // length of addresses for COPYs
266  }
267
268  // Call this function before beginning to iterating through the entire
269  // diff string using the Expect... functions.  It makes sure that the
270  // size totals in the window header match the number of bytes that
271  // were parsed.
272  void VerifySizes() {
273    EXPECT_EQ(verify_position_, diff_.size());
274    const size_t delta_encoding_size = verify_position_ -
275                                       saved_delta_encoding_position_;
276    verify_position_ = saved_total_size_position_;
277    ExpectSize(delta_encoding_size);
278    verify_position_ = saved_section_sizes_position_;
279    ExpectSize(data_bytes_);
280    ExpectSize(instruction_bytes_);
281    ExpectSize(address_bytes_);
282  }
283
284  bool interleaved_;
285  string diff_;
286  OutputString<string> diff_output_string_;
287  size_t verify_position_;
288  size_t saved_total_size_position_;
289  size_t saved_delta_encoding_position_;
290  size_t saved_section_sizes_position_;
291  size_t data_bytes_;
292  size_t instruction_bytes_;
293  size_t address_bytes_;
294  VCDiffAddressCache default_cache_;  // Used for finding mode values
295};
296
297class VCDiffEngineTest : public VCDiffEngineTestBase {
298 protected:
299  VCDiffEngineTest() :
300      engine_(dictionary_, strlen(dictionary_)) {
301    EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init());
302  }
303
304  virtual ~VCDiffEngineTest() { }
305
306
307  static void SetUpTestCase() {
308    MakeEachLetterABlock(dictionary_without_spaces_, &dictionary_,
309                         kBlockSize, false);
310    MakeEachLetterABlock(target_without_spaces_, &target_, kBlockSize, false);
311  }
312
313  static void TearDownTestCase() {
314    delete[] dictionary_;
315    delete[] target_;
316  }
317
318  void EncodeNothing(bool interleaved, bool target_matching) {
319    interleaved_ = interleaved;
320    VCDiffCodeTableWriter coder(interleaved);
321    coder.Init(engine_.dictionary_size());
322    engine_.Encode("", 0, target_matching, &diff_output_string_, &coder);
323    EXPECT_TRUE(diff_.empty());
324  }
325
326  // text must be NULL-terminated
327  void EncodeText(const char* text, bool interleaved, bool target_matching) {
328    interleaved_ = interleaved;
329    VCDiffCodeTableWriter coder(interleaved);
330    coder.Init(engine_.dictionary_size());
331    engine_.Encode(text,
332                   strlen(text),
333                   target_matching,
334                   &diff_output_string_,
335                   &coder);
336  }
337
338  void Encode(bool interleaved, bool target_matching) {
339    EncodeText(target_, interleaved, target_matching);
340    VerifyHeader();
341  }
342
343  void VerifyHeader() {
344    VerifyHeaderForDictionaryAndTargetText(dictionary_, target_);
345  }
346
347  static const char dictionary_without_spaces_[];
348  static const char target_without_spaces_[];
349
350  static const char* dictionary_;
351  static const char* target_;
352
353  const VCDiffEngine engine_;
354};
355
356#ifdef GTEST_HAS_DEATH_TEST
357typedef VCDiffEngineTest VCDiffEngineDeathTest;
358#endif  // GTEST_HAS_DEATH_TEST
359
360const char VCDiffEngineTest::dictionary_without_spaces_[] =
361    "The only thing we have to fear is fear itself";
362
363const char VCDiffEngineTest::target_without_spaces_[] =
364    "What we hear is fearsome";
365
366const char* VCDiffEngineTest::dictionary_ = NULL;
367const char* VCDiffEngineTest::target_ = NULL;
368
369#ifdef GTEST_HAS_DEATH_TEST
370TEST_F(VCDiffEngineDeathTest, InitCalledTwice) {
371  EXPECT_DEBUG_DEATH(EXPECT_FALSE(const_cast<VCDiffEngine*>(&engine_)->Init()),
372                     "twice");
373}
374#endif  // GTEST_HAS_DEATH_TEST
375
376TEST_F(VCDiffEngineTest, EngineEncodeNothing) {
377  EncodeNothing(/* interleaved = */ false, /* target matching = */ false);
378}
379
380TEST_F(VCDiffEngineTest, EngineEncodeNothingInterleaved) {
381  EncodeNothing(/* interleaved = */ true, /* target matching = */ false);
382}
383
384TEST_F(VCDiffEngineTest, EngineEncodeNothingTarget) {
385  EncodeNothing(/* interleaved = */ false, /* target matching = */ true);
386}
387
388TEST_F(VCDiffEngineTest, EngineEncodeNothingTargetInterleaved) {
389  EncodeNothing(/* interleaved = */ true, /* target matching = */ true);
390}
391
392TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlock) {
393  const char* small_text = "  ";
394  EncodeText(small_text,
395             /* interleaved = */ false,
396             /* target matching = */ false);
397  VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text);
398  // Data for ADDs
399  ExpectDataString(small_text);
400  // Instructions and sizes
401  ExpectAddInstructionForStringLength(small_text);
402}
403
404TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlockInterleaved) {
405  const char* small_text = "  ";
406  EncodeText(small_text,
407             /* interleaved = */ true,
408             /* target matching = */ false);
409  VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text);
410  // Interleaved section
411  ExpectAddInstructionForStringLength(small_text);
412  ExpectDataString(small_text);
413}
414
415TEST_F(VCDiffEngineTest, EngineEncodeSampleText) {
416  Encode(/* interleaved = */ false, /* target matching = */ false);
417  // Data for ADDs
418  ExpectDataStringWithBlockSpacing("W", false);
419  ExpectDataByte('t');
420  ExpectDataByte('s');
421  ExpectDataByte('m');
422  // Instructions and sizes
423  if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
424                                VCD_SELF_MODE)) {
425    ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
426  }
427  ExpectAddInstruction(1);
428  ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
429  ExpectCopyInstruction(11 * kBlockSize,
430                        default_cache_.FirstNearMode());
431  if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
432    ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
433  }
434  if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
435    ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
436  }
437  // Addresses for COPY
438  ExpectAddressVarint(18 * kBlockSize);  // "ha"
439  ExpectAddressVarint(14 * kBlockSize);  // " we h"
440  ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
441  ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
442  ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
443  VerifySizes();
444}
445
446TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleaved) {
447  Encode(/* interleaved = */ true, /* target matching = */ false);
448  // Interleaved section
449  if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
450                                VCD_SELF_MODE)) {
451    ExpectDataStringWithBlockSpacing("W", false);
452    ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
453  } else {
454    ExpectDataStringWithBlockSpacing("W", false);
455  }
456  ExpectAddressVarint(18 * kBlockSize);  // "ha"
457  ExpectAddInstruction(1);
458  ExpectDataByte('t');
459  ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
460  ExpectAddressVarint(14 * kBlockSize);  // " we h"
461  ExpectCopyInstruction(11 * kBlockSize,
462                        default_cache_.FirstNearMode());
463  ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
464  if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
465    ExpectDataByte('s');
466    ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
467  } else {
468    ExpectDataByte('s');
469  }
470  ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
471  if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
472    ExpectDataByte('m');
473    ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
474  } else {
475    ExpectDataByte('m');
476  }
477  ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
478  VerifySizes();
479}
480
481TEST_F(VCDiffEngineTest, EngineEncodeSampleTextWithTargetMatching) {
482  Encode(/* interleaved = */ false, /* target matching = */ true);
483  // Data for ADDs
484  ExpectDataStringWithBlockSpacing("W", false);
485  ExpectDataByte('t');
486  ExpectDataByte('s');
487  ExpectDataByte('m');
488  // Instructions and sizes
489  if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
490                                VCD_SELF_MODE)) {
491    ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
492  }
493  ExpectAddInstruction(1);
494  ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
495  ExpectCopyInstruction(11 * kBlockSize,
496                        default_cache_.FirstNearMode());
497  if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
498    ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
499  }
500  if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
501    ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
502  }
503  // Addresses for COPY
504  ExpectAddressVarint(18 * kBlockSize);  // "ha"
505  ExpectAddressVarint(14 * kBlockSize);  // " we h"
506  ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
507  ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
508  ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
509  VerifySizes();
510}
511
512TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleavedWithTargetMatching) {
513  Encode(/* interleaved = */ true, /* target matching = */ true);
514  // Interleaved section
515  if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
516                                VCD_SELF_MODE)) {
517    ExpectDataStringWithBlockSpacing("W", false);
518    ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
519  } else {
520    ExpectDataStringWithBlockSpacing("W", false);
521  }
522  ExpectAddressVarint(18 * kBlockSize);  // "ha"
523  ExpectAddInstruction(1);
524  ExpectDataByte('t');
525  ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
526  ExpectAddressVarint(14 * kBlockSize);  // " we h"
527  ExpectCopyInstruction(11 * kBlockSize,
528                        default_cache_.FirstNearMode());
529  ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
530  if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
531    ExpectDataByte('s');
532    ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
533  } else {
534    ExpectDataByte('s');
535  }
536  ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
537  if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
538    ExpectDataByte('m');
539    ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
540  } else {
541    ExpectDataByte('m');
542  }
543  ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
544  VerifySizes();
545}
546
547// This test case takes a dictionary containing several instances of the string
548// "weasel", and a target string which is identical to the dictionary
549// except that all instances of "weasel" have been replaced with the string
550// "moon-pie".  It tests that COPY instructions are generated for all
551// boilerplate text (that is, the text between the "moon-pie" instances in
552// the target) and, if target matching is enabled, that each instance of
553// "moon-pie" (except the first one) is encoded using a COPY instruction
554// rather than an ADD.
555class WeaselsToMoonpiesTest : public VCDiffEngineTestBase {
556 protected:
557  // kCompressibleTestBlockSize:
558  // The size of the block to create for each letter in the
559  // dictionary and search string for the "compressible text" test.
560  // See MakeEachLetterABlock, below.
561  // If we use kCompressibleTestBlockSize = kBlockSize, then the
562  // encoder will find one match per unique letter in the HTML text.
563  // There are too many examples of "<" in the text for the encoder
564  // to iterate through them all, and some matches are not found.
565  // If we use kCompressibleTextBlockSize = 1, then the boilerplate
566  // text between "weasel" strings in the dictionary and "moon-pie"
567  // strings in the target may not be long enough to be found by
568  // the encoder's block-hash algorithm.  A good value, that will give
569  // reproducible results across all block sizes, will be somewhere
570  // in between these extremes.
571  static const int kCompressibleTestBlockSize = kBlockSize / 4;
572  static const int kTrailingSpaces = kCompressibleTestBlockSize - 1;
573
574  WeaselsToMoonpiesTest() :
575      engine_(dictionary_, strlen(dictionary_)),
576      match_index_(0),
577      search_dictionary_(dictionary_, strlen(dictionary_)),
578      copied_moonpie_address_(0) {
579    EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init());
580    weasel_positions_[0] = 0;
581    after_weasel_[0] = 0;
582    moonpie_positions_[0] = 0;
583    after_moonpie_[0] = 0;
584  }
585
586  virtual ~WeaselsToMoonpiesTest() { }
587
588  static void SetUpTestCase() {
589    MakeEachLetterABlock(dictionary_without_spaces_,
590                         &dictionary_,
591                         kCompressibleTestBlockSize,
592                         false);
593    MakeEachLetterABlock(target_without_spaces_,
594                         &target_,
595                         kCompressibleTestBlockSize,
596                         false);
597    MakeEachLetterABlock(weasel_text_without_spaces_,
598                         &weasel_text_,
599                         kCompressibleTestBlockSize,
600                         true);
601    MakeEachLetterABlock(moonpie_text_without_spaces_,
602                         &moonpie_text_,
603                         kCompressibleTestBlockSize,
604                         true);
605  }
606
607  static void TearDownTestCase() {
608    delete[] dictionary_;
609    delete[] target_;
610    delete[] weasel_text_;
611    delete[] moonpie_text_;
612  }
613
614  // text must be NULL-terminated
615  void EncodeText(const char* text, bool interleaved, bool target_matching) {
616    interleaved_ = interleaved;
617    VCDiffCodeTableWriter coder(interleaved);
618    coder.Init(engine_.dictionary_size());
619    engine_.Encode(text,
620                   strlen(text),
621                   target_matching,
622                   &diff_output_string_,
623                   &coder);
624  }
625
626  void Encode(bool interleaved, bool target_matching) {
627    EncodeText(target_, interleaved, target_matching);
628    VerifyHeader();
629  }
630
631  void VerifyHeader() {
632    VerifyHeaderForDictionaryAndTargetText(dictionary_, target_);
633  }
634
635  void ExpectCopyForSize(size_t size, int mode) {
636    ExpectCopyInstruction(static_cast<int>(size), mode);
637  }
638
639  void ExpectAddForSize(size_t size) {
640    ExpectAddInstruction(static_cast<int>(size));
641  }
642
643  void ExpectAddressVarintForSize(size_t value) {
644    ExpectAddressVarint(static_cast<int32_t>(value));
645  }
646
647  void FindNextMoonpie(bool include_trailing_spaces) {
648    ++match_index_;
649    SetCurrentWeaselPosition(search_dictionary_.find(weasel_text_,
650                                                     AfterLastWeasel()));
651    if (CurrentWeaselPosition() == string::npos) {
652      SetCurrentMoonpiePosition(string::npos);
653    } else {
654      SetCurrentAfterWeaselPosition(CurrentWeaselPosition()
655                                        + strlen(weasel_text_)
656                                        + (include_trailing_spaces ?
657                                            kTrailingSpaces : 0));
658      SetCurrentMoonpiePosition(AfterLastMoonpie()
659                                    + CurrentBoilerplateLength());
660      SetCurrentAfterMoonpiePosition(CurrentMoonpiePosition()
661                                        + strlen(moonpie_text_)
662                                        + (include_trailing_spaces ?
663                                            kTrailingSpaces : 0));
664    }
665  }
666  bool NoMoreMoonpies() const {
667    return CurrentMoonpiePosition() == string::npos;
668  }
669  size_t CurrentWeaselPosition() const {
670    return weasel_positions_[match_index_];
671  }
672  size_t LastWeaselPosition() const {
673    return weasel_positions_[match_index_ - 1];
674  }
675  size_t CurrentMoonpiePosition() const {
676    return moonpie_positions_[match_index_];
677  }
678  size_t LastMoonpiePosition() const {
679    return moonpie_positions_[match_index_ - 1];
680  }
681  size_t AfterLastWeasel() const {
682    CHECK_GE(match_index_, 1);
683    return after_weasel_[match_index_ - 1];
684  }
685  size_t AfterPreviousWeasel() const {
686    CHECK_GE(match_index_, 2);
687    return after_weasel_[match_index_ - 2];
688  }
689  size_t AfterLastMoonpie() const {
690    CHECK_GE(match_index_, 1);
691    return after_moonpie_[match_index_ - 1];
692  }
693  size_t AfterPreviousMoonpie() const {
694    CHECK_GE(match_index_, 2);
695    return after_moonpie_[match_index_ - 2];
696  }
697
698  void SetCurrentWeaselPosition(size_t value) {
699    weasel_positions_[match_index_] = value;
700  }
701  void SetCurrentAfterWeaselPosition(size_t value) {
702    after_weasel_[match_index_] = value;
703  }
704  void SetCurrentMoonpiePosition(size_t value) {
705    moonpie_positions_[match_index_] = value;
706  }
707  void SetCurrentAfterMoonpiePosition(size_t value) {
708    after_moonpie_[match_index_] = value;
709  }
710
711  // Find the length of the text in between the "weasel" strings in the
712  // compressible dictionary, which is the same as the text between
713  // the "moon-pie" strings in the compressible target.
714  size_t CurrentBoilerplateLength() const {
715    CHECK_GE(match_index_, 1);
716    return CurrentWeaselPosition() - AfterLastWeasel();
717  }
718  size_t DistanceFromLastWeasel() const {
719    CHECK_GE(match_index_, 1);
720    return CurrentWeaselPosition() - LastWeaselPosition();
721  }
722  size_t DistanceFromLastMoonpie() const {
723    CHECK_GE(match_index_, 1);
724    return CurrentMoonpiePosition() - LastMoonpiePosition();
725  }
726  size_t DistanceBetweenLastTwoWeasels() const {
727    CHECK_GE(match_index_, 2);
728    return AfterLastWeasel() - AfterPreviousWeasel();
729  }
730  size_t DistanceBetweenLastTwoMoonpies() const {
731    CHECK_GE(match_index_, 2);
732    return AfterLastMoonpie() - AfterPreviousMoonpie();
733  }
734
735  int32_t FindBoilerplateAddressForCopyMode(int copy_mode) const;
736  int UpdateCopyModeForMoonpie(int copy_mode) const;
737  int32_t FindMoonpieAddressForCopyMode(int copy_mode) const;
738
739  void CopyBoilerplateAndAddMoonpie(int copy_mode);
740  void CopyBoilerplateAndCopyMoonpie(int copy_mode, int moonpie_copy_mode);
741
742  static const char dictionary_without_spaces_[];
743  static const char target_without_spaces_[];
744  static const char weasel_text_without_spaces_[];
745  static const char moonpie_text_without_spaces_[];
746
747  static const char* dictionary_;
748  static const char* target_;
749  static const char* weasel_text_;
750  static const char* moonpie_text_;
751
752  const VCDiffEngine engine_;
753  size_t weasel_positions_[128];
754  size_t after_weasel_[128];
755  size_t moonpie_positions_[128];
756  size_t after_moonpie_[128];
757  int match_index_;
758  string search_dictionary_;
759  size_t copied_moonpie_address_;
760};
761
762// Care is taken in the formulation of the dictionary
763// to ensure that the surrounding letters do not match; for example,
764// there are not two instances of the string "weasels".  Otherwise,
765// the matching behavior would not be as predictable.
766const char WeaselsToMoonpiesTest::dictionary_without_spaces_[] =
767  "<html>\n"
768  "<head>\n"
769  "<meta content=\"text/html; charset=ISO-8859-1\"\n"
770  "http-equiv=\"content-type\">\n"
771  "<title>All about weasels</title>\n"
772  "</head>\n"
773  "<!-- You will notice that the word \"weasel\" may be replaced"
774  " with something else -->\n"
775  "<body>\n"
776  "<h1>All about the weasel: highly compressible HTML text</h1>"
777  "<ul>\n"
778  "<li>Don\'t look a gift weasel in its mouth.</li>\n"
779  "<li>This item makes sure the next occurrence is found.</li>\n"
780  "<li>Don\'t count your weasel, before it\'s hatched.</li>\n"
781  "</ul>\n"
782  "<br>\n"
783  "</body>\n"
784  "</html>\n";
785
786const char WeaselsToMoonpiesTest::target_without_spaces_[] =
787  "<html>\n"
788  "<head>\n"
789  "<meta content=\"text/html; charset=ISO-8859-1\"\n"
790  "http-equiv=\"content-type\">\n"
791  "<title>All about moon-pies</title>\n"
792  "</head>\n"
793  "<!-- You will notice that the word \"moon-pie\" may be replaced"
794  " with something else -->\n"
795  "<body>\n"
796  "<h1>All about the moon-pie: highly compressible HTML text</h1>"
797  "<ul>\n"
798  "<li>Don\'t look a gift moon-pie in its mouth.</li>\n"
799  "<li>This item makes sure the next occurrence is found.</li>\n"
800  "<li>Don\'t count your moon-pie, before it\'s hatched.</li>\n"
801  "</ul>\n"
802  "<br>\n"
803  "</body>\n"
804  "</html>\n";
805
806const char WeaselsToMoonpiesTest::weasel_text_without_spaces_[] = "weasel";
807const char WeaselsToMoonpiesTest::moonpie_text_without_spaces_[] = "moon-pie";
808
809const char* WeaselsToMoonpiesTest::dictionary_ = NULL;
810const char* WeaselsToMoonpiesTest::target_ = NULL;
811const char* WeaselsToMoonpiesTest::weasel_text_ = NULL;
812const char* WeaselsToMoonpiesTest::moonpie_text_ = NULL;
813
814int32_t WeaselsToMoonpiesTest::FindBoilerplateAddressForCopyMode(
815    int copy_mode) const {
816  size_t copy_address = 0;
817  if (default_cache_.IsSelfMode(copy_mode)) {
818    copy_address = AfterLastWeasel();
819  } else if (default_cache_.IsNearMode(copy_mode)) {
820    copy_address = DistanceBetweenLastTwoWeasels();
821  } else if (default_cache_.IsSameMode(copy_mode)) {
822    copy_address = AfterLastWeasel() % 256;
823  }
824  return static_cast<int32_t>(copy_address);
825}
826
827int WeaselsToMoonpiesTest::UpdateCopyModeForMoonpie(int copy_mode) const {
828  if (copy_mode == default_cache_.FirstSameMode()) {
829    return default_cache_.FirstSameMode()
830        + static_cast<int>((copied_moonpie_address_ / 256) % 3);
831  } else {
832    return copy_mode;
833  }
834}
835
836int32_t WeaselsToMoonpiesTest::FindMoonpieAddressForCopyMode(
837    int copy_mode) const {
838  size_t copy_address = 0;
839  if (default_cache_.IsHereMode(copy_mode)) {
840    copy_address = DistanceFromLastMoonpie();
841  } else if (default_cache_.IsNearMode(copy_mode)) {
842    copy_address = DistanceBetweenLastTwoMoonpies() - kTrailingSpaces;
843  } else if (default_cache_.IsSameMode(copy_mode)) {
844    copy_address = copied_moonpie_address_ % 256;
845  }
846  return static_cast<int32_t>(copy_address);
847}
848
849// Expect one dictionary instance of "weasel" to be replaced with "moon-pie"
850// in the encoding.
851void WeaselsToMoonpiesTest::CopyBoilerplateAndAddMoonpie(int copy_mode) {
852  EXPECT_FALSE(NoMoreMoonpies());
853  ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode);
854  ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode);
855  ExpectAddInstructionForStringLength(moonpie_text_);
856  ExpectDataString(moonpie_text_);
857}
858
859// Expect one dictionary instance of "weasel" to be replaced with "moon-pie"
860// in the encoding.  The "moon-pie" text will be copied from the previously
861// encoded target.
862void WeaselsToMoonpiesTest::CopyBoilerplateAndCopyMoonpie(
863    int copy_mode,
864    int moonpie_copy_mode) {
865  EXPECT_FALSE(NoMoreMoonpies());
866  ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode);
867  ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode);
868  moonpie_copy_mode = UpdateCopyModeForMoonpie(moonpie_copy_mode);
869  ExpectCopyForSize(strlen(moonpie_text_) + kTrailingSpaces, moonpie_copy_mode);
870  ExpectAddress(FindMoonpieAddressForCopyMode(moonpie_copy_mode),
871                moonpie_copy_mode);
872  copied_moonpie_address_ = strlen(dictionary_) + LastMoonpiePosition();
873}
874
875TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleNoTargetMatching) {
876  Encode(/* interleaved = */ true, /* target matching = */ false);
877  FindNextMoonpie(false);
878  // Expect all five "weasel"s to be replaced with "moon-pie"s
879  CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode());
880  FindNextMoonpie(false);
881  CopyBoilerplateAndAddMoonpie(VCD_SELF_MODE);
882  FindNextMoonpie(false);
883  CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 1);
884  FindNextMoonpie(false);
885  CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 2);
886  FindNextMoonpie(false);
887  CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 3);
888  FindNextMoonpie(false);
889  EXPECT_TRUE(NoMoreMoonpies());
890  ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(),
891                    default_cache_.FirstNearMode());
892  ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels());
893  VerifySizes();
894}
895
896TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleWithTargetMatching) {
897  Encode(/* interleaved = */ true, /* target matching = */ true);
898  // Expect all five "weasel"s to be replaced with "moon-pie"s.
899  // Every "moon-pie" after the first one should be copied from the
900  // previously encoded target text.
901  FindNextMoonpie(false);
902  CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode());
903  FindNextMoonpie(true);
904  CopyBoilerplateAndCopyMoonpie(VCD_SELF_MODE, VCD_HERE_MODE);
905  FindNextMoonpie(true);
906  CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1,
907                                default_cache_.FirstSameMode());
908  FindNextMoonpie(true);
909  CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 3,
910                                VCD_HERE_MODE);
911  FindNextMoonpie(true);
912  CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1,
913                                default_cache_.FirstSameMode());
914  FindNextMoonpie(true);
915  EXPECT_TRUE(NoMoreMoonpies());
916  ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(),
917                    default_cache_.FirstNearMode() + 3);
918  ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels());
919  VerifySizes();
920}
921
922}  //  anonymous namespace
923}  //  namespace open-vcdiff
924