1// Copyright 2008 Google Inc.
2// Author: Lincoln Smith
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8//      http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16#include <config.h>
17#include "google/vcencoder.h"
18#include <stdlib.h>  // free, posix_memalign
19#include <string.h>  // memcpy
20#include <algorithm>
21#include <string>
22#include <vector>
23#include "blockhash.h"
24#include "checksum.h"
25#include "testing.h"
26#include "varint_bigendian.h"
27#include "google/vcdecoder.h"
28#include "vcdiff_defs.h"
29
30#ifdef HAVE_EXT_ROPE
31#include <ext/rope>
32#include "output_string_crope.h"
33using __gnu_cxx::crope;
34#endif  // HAVE_EXT_ROPE
35
36#ifdef HAVE_MALLOC_H
37#include <malloc.h>
38#endif  // HAVE_MALLOC_H
39
40#ifdef HAVE_SYS_MMAN_H
41#define _XOPEN_SOURCE 600  // posix_memalign
42#include <sys/mman.h>  // mprotect
43#endif  // HAVE_SYS_MMAN_H
44
45#ifdef HAVE_UNISTD_H
46#include <unistd.h>  // getpagesize
47#endif  // HAVE_UNISTD_H
48
49namespace open_vcdiff {
50namespace {
51
52static const size_t kFileHeaderSize = sizeof(DeltaFileHeader);
53
54// This is to check the maximum possible encoding size
55// if using a single ADD instruction, so assume that the
56// dictionary size, the length of the ADD data, the size
57// of the target window, and the length of the delta window
58// are all two-byte Varints, that is, 128 <= length < 4096.
59// This figure includes three extra bytes for a zero-sized
60// ADD instruction with a two-byte Varint explicit size.
61// Any additional COPY & ADD instructions must reduce
62// the length of the encoding from this maximum.
63static const size_t kWindowHeaderSize = 21;
64
65class VerifyEncodedBytesTest : public testing::Test {
66 public:
67  typedef std::string string;
68
69  VerifyEncodedBytesTest() : delta_index_(0) { }
70  virtual ~VerifyEncodedBytesTest() { }
71
72  void ExpectByte(unsigned char b) {
73    EXPECT_EQ(b, static_cast<unsigned char>(delta_[delta_index_]));
74    ++delta_index_;
75  }
76
77  void ExpectString(const char* s) {
78    const size_t size = strlen(s);  // don't include terminating NULL char
79    EXPECT_EQ(string(s, size),
80              string(delta_data() + delta_index_, size));
81    delta_index_ += size;
82  }
83
84  void ExpectNoMoreBytes() {
85    EXPECT_EQ(delta_index_, delta_size());
86  }
87
88  void ExpectSize(size_t size) {
89    const char* delta_size_pos = &delta_[delta_index_];
90    EXPECT_EQ(size,
91              static_cast<size_t>(
92                  VarintBE<int32_t>::Parse(delta_data() + delta_size(),
93                                           &delta_size_pos)));
94    delta_index_ = delta_size_pos - delta_data();
95  }
96
97  void ExpectChecksum(VCDChecksum checksum) {
98    const char* delta_checksum_pos = &delta_[delta_index_];
99    EXPECT_EQ(checksum,
100              static_cast<VCDChecksum>(
101                  VarintBE<int64_t>::Parse(delta_data() + delta_size(),
102                                           &delta_checksum_pos)));
103    delta_index_ = delta_checksum_pos - delta_data();
104  }
105
106  const string& delta_as_const() const { return delta_; }
107  string* delta() { return &delta_; }
108
109  const char* delta_data() const { return delta_as_const().data(); }
110  size_t delta_size() const { return delta_as_const().size(); }
111
112 private:
113  string delta_;
114  size_t delta_index_;
115};
116
117class VCDiffEncoderTest : public VerifyEncodedBytesTest {
118 protected:
119  static const char kDictionary[];
120  static const char kTarget[];
121
122  VCDiffEncoderTest();
123  virtual ~VCDiffEncoderTest() { }
124
125  void TestWithFixedChunkSize(size_t chunk_size);
126  void TestWithEncodedChunkVector(size_t chunk_size);
127
128  HashedDictionary hashed_dictionary_;
129  VCDiffStreamingEncoder encoder_;
130  VCDiffStreamingDecoder decoder_;
131  VCDiffEncoder simple_encoder_;
132  VCDiffDecoder simple_decoder_;
133
134  string result_target_;
135};
136
137const char VCDiffEncoderTest::kDictionary[] =
138    "\"Just the place for a Snark!\" the Bellman cried,\n"
139    "As he landed his crew with care;\n"
140    "Supporting each man on the top of the tide\n"
141    "By a finger entwined in his hair.\n";
142
143const char VCDiffEncoderTest::kTarget[] =
144    "\"Just the place for a Snark! I have said it twice:\n"
145    "That alone should encourage the crew.\n"
146    "Just the place for a Snark! I have said it thrice:\n"
147    "What I tell you three times is true.\"\n";
148
149VCDiffEncoderTest::VCDiffEncoderTest()
150    : hashed_dictionary_(kDictionary, sizeof(kDictionary)),
151      encoder_(&hashed_dictionary_,
152               VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM,
153               /* look_for_target_matches = */ true),
154      simple_encoder_(kDictionary, sizeof(kDictionary)) {
155  EXPECT_TRUE(hashed_dictionary_.Init());
156}
157
158TEST_F(VCDiffEncoderTest, EncodeBeforeStartEncoding) {
159  EXPECT_FALSE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta()));
160}
161
162TEST_F(VCDiffEncoderTest, FinishBeforeStartEncoding) {
163  EXPECT_FALSE(encoder_.FinishEncoding(delta()));
164}
165
166TEST_F(VCDiffEncoderTest, EncodeDecodeNothing) {
167  HashedDictionary nothing_dictionary("", 0);
168  EXPECT_TRUE(nothing_dictionary.Init());
169  VCDiffStreamingEncoder nothing_encoder(&nothing_dictionary,
170                                         VCD_STANDARD_FORMAT,
171                                         false);
172  EXPECT_TRUE(nothing_encoder.StartEncoding(delta()));
173  EXPECT_TRUE(nothing_encoder.FinishEncoding(delta()));
174  decoder_.StartDecoding("", 0);
175  EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
176                                   delta_size(),
177                                   &result_target_));
178  EXPECT_TRUE(decoder_.FinishDecoding());
179  EXPECT_TRUE(result_target_.empty());
180}
181
182// A NULL dictionary pointer is legal as long as the dictionary size is 0.
183TEST_F(VCDiffEncoderTest, EncodeDecodeNullDictionaryPtr) {
184  HashedDictionary null_dictionary(NULL, 0);
185  EXPECT_TRUE(null_dictionary.Init());
186  VCDiffStreamingEncoder null_encoder(&null_dictionary,
187                                      VCD_STANDARD_FORMAT,
188                                      false);
189  EXPECT_TRUE(null_encoder.StartEncoding(delta()));
190  EXPECT_TRUE(null_encoder.EncodeChunk(kTarget, strlen(kTarget), delta()));
191  EXPECT_TRUE(null_encoder.FinishEncoding(delta()));
192  EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
193            delta_size());
194  decoder_.StartDecoding(NULL, 0);
195  EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
196                                   delta_size(),
197                                   &result_target_));
198  EXPECT_TRUE(decoder_.FinishDecoding());
199  EXPECT_EQ(kTarget, result_target_);
200}
201
202TEST_F(VCDiffEncoderTest, EncodeDecodeSimple) {
203  EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta()));
204  EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
205            delta_size());
206  EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
207                                     sizeof(kDictionary),
208                                     delta_as_const(),
209                                     &result_target_));
210  EXPECT_EQ(kTarget, result_target_);
211}
212
213TEST_F(VCDiffEncoderTest, EncodeDecodeInterleaved) {
214  simple_encoder_.SetFormatFlags(VCD_FORMAT_INTERLEAVED);
215  EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta()));
216  EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
217            delta_size());
218  EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
219                                     sizeof(kDictionary),
220                                     delta_as_const(),
221                                     &result_target_));
222  EXPECT_EQ(kTarget, result_target_);
223}
224
225TEST_F(VCDiffEncoderTest, EncodeDecodeInterleavedChecksum) {
226  simple_encoder_.SetFormatFlags(VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM);
227  EXPECT_TRUE(simple_encoder_.Encode(kTarget,
228                                     strlen(kTarget),
229                                     delta()));
230  EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
231            delta_size());
232  EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
233                                     sizeof(kDictionary),
234                                     delta_as_const(),
235                                     &result_target_));
236  EXPECT_EQ(kTarget, result_target_);
237}
238
239TEST_F(VCDiffEncoderTest, EncodeDecodeSingleChunk) {
240  EXPECT_TRUE(encoder_.StartEncoding(delta()));
241  EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta()));
242  EXPECT_TRUE(encoder_.FinishEncoding(delta()));
243  EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
244            delta_size());
245  decoder_.StartDecoding(kDictionary, sizeof(kDictionary));
246  EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
247                                   delta_size(),
248                                   &result_target_));
249  EXPECT_TRUE(decoder_.FinishDecoding());
250  EXPECT_EQ(kTarget, result_target_);
251}
252
253TEST_F(VCDiffEncoderTest, EncodeDecodeSeparate) {
254  string delta_start, delta_encode, delta_finish;
255  EXPECT_TRUE(encoder_.StartEncoding(&delta_start));
256  EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), &delta_encode));
257  EXPECT_TRUE(encoder_.FinishEncoding(&delta_finish));
258  EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
259            delta_start.size() + delta_encode.size() + delta_finish.size());
260  decoder_.StartDecoding(kDictionary, sizeof(kDictionary));
261  EXPECT_TRUE(decoder_.DecodeChunk(delta_start.data(),
262                                   delta_start.size(),
263                                   &result_target_));
264  EXPECT_TRUE(decoder_.DecodeChunk(delta_encode.data(),
265                                   delta_encode.size(),
266                                   &result_target_));
267  EXPECT_TRUE(decoder_.DecodeChunk(delta_finish.data(),
268                                   delta_finish.size(),
269                                   &result_target_));
270  EXPECT_TRUE(decoder_.FinishDecoding());
271  EXPECT_EQ(kTarget, result_target_);
272}
273
274#ifdef HAVE_EXT_ROPE
275// Test that the crope class can be used in place of a string for encoding
276// and decoding.
277TEST_F(VCDiffEncoderTest, EncodeDecodeCrope) {
278  crope delta_crope, result_crope;
279  EXPECT_TRUE(encoder_.StartEncoding(&delta_crope));
280  EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), &delta_crope));
281  EXPECT_TRUE(encoder_.FinishEncoding(&delta_crope));
282  EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
283            delta_crope.size());
284  decoder_.StartDecoding(kDictionary, sizeof(kDictionary));
285  // crope can't guarantee that its characters are contiguous, so the decoding
286  // has to be done byte-by-byte.
287  for (crope::const_iterator it = delta_crope.begin();
288       it != delta_crope.end(); it++) {
289    const char this_char = *it;
290    EXPECT_TRUE(decoder_.DecodeChunk(&this_char, 1, &result_crope));
291  }
292  EXPECT_TRUE(decoder_.FinishDecoding());
293  crope expected_target(kTarget);
294  EXPECT_EQ(expected_target, result_crope);
295}
296#endif  // HAVE_EXT_ROPE
297
298void VCDiffEncoderTest::TestWithFixedChunkSize(size_t chunk_size) {
299  delta()->clear();
300  EXPECT_TRUE(encoder_.StartEncoding(delta()));
301  for (size_t chunk_start_index = 0;
302       chunk_start_index < strlen(kTarget);
303       chunk_start_index += chunk_size) {
304    size_t this_chunk_size = chunk_size;
305    const size_t bytes_available = strlen(kTarget) - chunk_start_index;
306    if (this_chunk_size > bytes_available) {
307      this_chunk_size = bytes_available;
308    }
309    EXPECT_TRUE(encoder_.EncodeChunk(&kTarget[chunk_start_index],
310                                     this_chunk_size,
311                                     delta()));
312  }
313  EXPECT_TRUE(encoder_.FinishEncoding(delta()));
314  const size_t num_windows = (strlen(kTarget) / chunk_size) + 1;
315  const size_t size_of_windows =
316      strlen(kTarget) + (kWindowHeaderSize * num_windows);
317  EXPECT_GE(kFileHeaderSize + size_of_windows, delta_size());
318  result_target_.clear();
319  decoder_.StartDecoding(kDictionary, sizeof(kDictionary));
320  for (size_t chunk_start_index = 0;
321       chunk_start_index < delta_size();
322       chunk_start_index += chunk_size) {
323    size_t this_chunk_size = chunk_size;
324    const size_t bytes_available = delta_size() - chunk_start_index;
325    if (this_chunk_size > bytes_available) {
326      this_chunk_size = bytes_available;
327    }
328    EXPECT_TRUE(decoder_.DecodeChunk(delta_data() + chunk_start_index,
329                                     this_chunk_size,
330                                     &result_target_));
331  }
332  EXPECT_TRUE(decoder_.FinishDecoding());
333  EXPECT_EQ(kTarget, result_target_);
334}
335
336TEST_F(VCDiffEncoderTest, EncodeDecodeFixedChunkSizes) {
337  // These specific chunk sizes have failed in the past
338  TestWithFixedChunkSize(6);
339  TestWithFixedChunkSize(45);
340  TestWithFixedChunkSize(60);
341
342  // Now loop through all possible chunk sizes
343  for (size_t chunk_size = 1; chunk_size < strlen(kTarget); ++chunk_size) {
344    TestWithFixedChunkSize(chunk_size);
345  }
346}
347
348// If --allow_vcd_target=false is specified, the decoder will throw away some of
349// the internally-stored decoded target beyond the current window.  Try
350// different numbers of encoded window sizes to make sure that this behavior
351// does not affect the results.
352TEST_F(VCDiffEncoderTest, EncodeDecodeFixedChunkSizesNoVcdTarget) {
353  decoder_.SetAllowVcdTarget(false);
354  // Loop through all possible chunk sizes
355  for (size_t chunk_size = 1; chunk_size < strlen(kTarget); ++chunk_size) {
356    TestWithFixedChunkSize(chunk_size);
357  }
358}
359
360// Splits the text to be encoded into fixed-size chunks.  Encodes each
361// chunk and puts it into a vector of strings.  Then decodes each string
362// in the vector and appends the result into result_target_.
363void VCDiffEncoderTest::TestWithEncodedChunkVector(size_t chunk_size) {
364  std::vector<string> encoded_chunks;
365  string this_encoded_chunk;
366  size_t total_chunk_size = 0;
367  EXPECT_TRUE(encoder_.StartEncoding(&this_encoded_chunk));
368  encoded_chunks.push_back(this_encoded_chunk);
369  total_chunk_size += this_encoded_chunk.size();
370  for (size_t chunk_start_index = 0;
371       chunk_start_index < strlen(kTarget);
372       chunk_start_index += chunk_size) {
373    size_t this_chunk_size = chunk_size;
374    const size_t bytes_available = strlen(kTarget) - chunk_start_index;
375    if (this_chunk_size > bytes_available) {
376      this_chunk_size = bytes_available;
377    }
378    this_encoded_chunk.clear();
379    EXPECT_TRUE(encoder_.EncodeChunk(&kTarget[chunk_start_index],
380                                     this_chunk_size,
381                                     &this_encoded_chunk));
382    encoded_chunks.push_back(this_encoded_chunk);
383    total_chunk_size += this_encoded_chunk.size();
384  }
385  this_encoded_chunk.clear();
386  EXPECT_TRUE(encoder_.FinishEncoding(&this_encoded_chunk));
387  encoded_chunks.push_back(this_encoded_chunk);
388  total_chunk_size += this_encoded_chunk.size();
389  const size_t num_windows = (strlen(kTarget) / chunk_size) + 1;
390  const size_t size_of_windows =
391      strlen(kTarget) + (kWindowHeaderSize * num_windows);
392  EXPECT_GE(kFileHeaderSize + size_of_windows, total_chunk_size);
393  result_target_.clear();
394  decoder_.StartDecoding(kDictionary, sizeof(kDictionary));
395  for (std::vector<string>::iterator it = encoded_chunks.begin();
396       it != encoded_chunks.end(); ++it) {
397    EXPECT_TRUE(decoder_.DecodeChunk(it->data(), it->size(), &result_target_));
398  }
399  EXPECT_TRUE(decoder_.FinishDecoding());
400  EXPECT_EQ(kTarget, result_target_);
401}
402
403TEST_F(VCDiffEncoderTest, EncodeDecodeStreamOfChunks) {
404  // Loop through all possible chunk sizes
405  for (size_t chunk_size = 1; chunk_size < strlen(kTarget); ++chunk_size) {
406    TestWithEncodedChunkVector(chunk_size);
407  }
408}
409
410// Verify that HashedDictionary stores a copy of the dictionary text,
411// rather than just storing a pointer to it.  If the dictionary buffer
412// is overwritten after creating a HashedDictionary from it, it shouldn't
413// affect an encoder that uses that HashedDictionary.
414TEST_F(VCDiffEncoderTest, DictionaryBufferOverwritten) {
415  string dictionary_copy(kDictionary, sizeof(kDictionary));
416  HashedDictionary hd_copy(dictionary_copy.data(), dictionary_copy.size());
417  EXPECT_TRUE(hd_copy.Init());
418  VCDiffStreamingEncoder copy_encoder(&hd_copy,
419                                      VCD_FORMAT_INTERLEAVED
420                                          | VCD_FORMAT_CHECKSUM,
421                                      /* look_for_target_matches = */ true);
422  // Produce a reference version of the encoded text.
423  string delta_before;
424  EXPECT_TRUE(copy_encoder.StartEncoding(&delta_before));
425  EXPECT_TRUE(copy_encoder.EncodeChunk(kTarget,
426                                       strlen(kTarget),
427                                       &delta_before));
428  EXPECT_TRUE(copy_encoder.FinishEncoding(&delta_before));
429  EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
430            delta_before.size());
431
432  // Overwrite the dictionary text with all 'Q' characters.
433  dictionary_copy.replace(0,
434                          dictionary_copy.size(),
435                          dictionary_copy.size(),
436                          'Q');
437  // When the encoder is used on the same target text after overwriting
438  // the dictionary, it should produce the same encoded output.
439  string delta_after;
440  EXPECT_TRUE(copy_encoder.StartEncoding(&delta_after));
441  EXPECT_TRUE(copy_encoder.EncodeChunk(kTarget, strlen(kTarget), &delta_after));
442  EXPECT_TRUE(copy_encoder.FinishEncoding(&delta_after));
443  EXPECT_EQ(delta_before, delta_after);
444}
445
446// Binary data test part 1: The dictionary and target data should not
447// be treated as NULL-terminated.  An embedded NULL should be handled like
448// any other byte of data.
449TEST_F(VCDiffEncoderTest, DictionaryHasEmbeddedNULLs) {
450  const char embedded_null_dictionary_text[] =
451      { 0x00, 0xFF, 0xFE, 0xFD, 0x00, 0xFD, 0xFE, 0xFF, 0x00, 0x03 };
452  const char embedded_null_target[] =
453      { 0xFD, 0x00, 0xFD, 0xFE, 0x03, 0x00, 0x01, 0x00 };
454  CHECK_EQ(10, sizeof(embedded_null_dictionary_text));
455  CHECK_EQ(8, sizeof(embedded_null_target));
456  HashedDictionary embedded_null_dictionary(embedded_null_dictionary_text,
457      sizeof(embedded_null_dictionary_text));
458  EXPECT_TRUE(embedded_null_dictionary.Init());
459  VCDiffStreamingEncoder embedded_null_encoder(&embedded_null_dictionary,
460      VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM,
461      /* look_for_target_matches = */ true);
462  EXPECT_TRUE(embedded_null_encoder.StartEncoding(delta()));
463  EXPECT_TRUE(embedded_null_encoder.EncodeChunk(embedded_null_target,
464                                                sizeof(embedded_null_target),
465                                                delta()));
466  EXPECT_TRUE(embedded_null_encoder.FinishEncoding(delta()));
467  decoder_.StartDecoding(embedded_null_dictionary_text,
468                         sizeof(embedded_null_dictionary_text));
469  EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
470                                   delta_size(),
471                                   &result_target_));
472  EXPECT_TRUE(decoder_.FinishDecoding());
473  EXPECT_EQ(sizeof(embedded_null_target), result_target_.size());
474  EXPECT_EQ(string(embedded_null_target,
475                   sizeof(embedded_null_target)),
476            result_target_);
477}
478
479// Binary data test part 2: An embedded CR or LF should be handled like
480// any other byte of data.  No text-processing of the data should occur.
481TEST_F(VCDiffEncoderTest, DictionaryHasEmbeddedNewlines) {
482  const char embedded_null_dictionary_text[] =
483      { 0x0C, 0xFF, 0xFE, 0x0C, 0x00, 0x0A, 0xFE, 0xFF, 0x00, 0x0A };
484  const char embedded_null_target[] =
485      { 0x0C, 0x00, 0x0A, 0xFE, 0x03, 0x00, 0x0A, 0x00 };
486  CHECK_EQ(10, sizeof(embedded_null_dictionary_text));
487  CHECK_EQ(8, sizeof(embedded_null_target));
488  HashedDictionary embedded_null_dictionary(embedded_null_dictionary_text,
489      sizeof(embedded_null_dictionary_text));
490  EXPECT_TRUE(embedded_null_dictionary.Init());
491  VCDiffStreamingEncoder embedded_null_encoder(&embedded_null_dictionary,
492      VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM,
493      /* look_for_target_matches = */ true);
494  EXPECT_TRUE(embedded_null_encoder.StartEncoding(delta()));
495  EXPECT_TRUE(embedded_null_encoder.EncodeChunk(embedded_null_target,
496                                                sizeof(embedded_null_target),
497                                                delta()));
498  EXPECT_TRUE(embedded_null_encoder.FinishEncoding(delta()));
499  decoder_.StartDecoding(embedded_null_dictionary_text,
500                         sizeof(embedded_null_dictionary_text));
501  EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
502                                   delta_size(),
503                                   &result_target_));
504  EXPECT_TRUE(decoder_.FinishDecoding());
505  EXPECT_EQ(sizeof(embedded_null_target), result_target_.size());
506  EXPECT_EQ(string(embedded_null_target,
507                   sizeof(embedded_null_target)),
508            result_target_);
509}
510
511TEST_F(VCDiffEncoderTest, UsingWideCharacters) {
512  const wchar_t wchar_dictionary_text[] =
513      L"\"Just the place for a Snark!\" the Bellman cried,\n"
514      L"As he landed his crew with care;\n"
515      L"Supporting each man on the top of the tide\n"
516      L"By a finger entwined in his hair.\n";
517
518  const wchar_t wchar_target[] =
519      L"\"Just the place for a Snark! I have said it twice:\n"
520      L"That alone should encourage the crew.\n"
521      L"Just the place for a Snark! I have said it thrice:\n"
522      L"What I tell you three times is true.\"\n";
523
524  HashedDictionary wchar_dictionary((const char*) wchar_dictionary_text,
525                                    sizeof(wchar_dictionary_text));
526  EXPECT_TRUE(wchar_dictionary.Init());
527  VCDiffStreamingEncoder wchar_encoder(&wchar_dictionary,
528                                       VCD_FORMAT_INTERLEAVED
529                                           | VCD_FORMAT_CHECKSUM,
530                                       /* look_for_target_matches = */ false);
531  EXPECT_TRUE(wchar_encoder.StartEncoding(delta()));
532  EXPECT_TRUE(wchar_encoder.EncodeChunk((const char*) wchar_target,
533                                        sizeof(wchar_target),
534                                        delta()));
535  EXPECT_TRUE(wchar_encoder.FinishEncoding(delta()));
536  decoder_.StartDecoding((const char*) wchar_dictionary_text,
537                         sizeof(wchar_dictionary_text));
538  EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
539                                   delta_size(),
540                                   &result_target_));
541  EXPECT_TRUE(decoder_.FinishDecoding());
542  const wchar_t* result_as_wchar = (const wchar_t*) result_target_.data();
543  EXPECT_EQ(wcslen(wchar_target), wcslen(result_as_wchar));
544  EXPECT_EQ(0, wcscmp(wchar_target, result_as_wchar));
545}
546
547#if defined(HAVE_MPROTECT) && \
548   (defined(HAVE_MEMALIGN) || defined(HAVE_POSIX_MEMALIGN))
549// Bug 1220602: Make sure the encoder doesn't read past the end of the input
550// buffer.
551TEST_F(VCDiffEncoderTest, ShouldNotReadPastEndOfBuffer) {
552  const size_t target_size = strlen(kTarget);
553
554  // Allocate two memory pages.
555  const int page_size = getpagesize();
556  void* two_pages = NULL;
557#ifdef HAVE_POSIX_MEMALIGN
558  posix_memalign(&two_pages, page_size, 2 * page_size);
559#else  // !HAVE_POSIX_MEMALIGN
560  two_pages = memalign(page_size, 2 * page_size);
561#endif  // HAVE_POSIX_MEMALIGN
562  char* const first_page = reinterpret_cast<char*>(two_pages);
563  char* const second_page = first_page + page_size;
564
565  // Place the target string at the end of the first page.
566  char* const target_with_guard = second_page - target_size;
567  memcpy(target_with_guard, kTarget, target_size);
568
569  // Make the second page unreadable.
570  mprotect(second_page, page_size, PROT_NONE);
571
572  // Now perform the encode operation, which will cause a segmentation fault
573  // if it reads past the end of the buffer.
574  EXPECT_TRUE(encoder_.StartEncoding(delta()));
575  EXPECT_TRUE(encoder_.EncodeChunk(target_with_guard, target_size, delta()));
576  EXPECT_TRUE(encoder_.FinishEncoding(delta()));
577
578  // Undo the mprotect.
579  mprotect(second_page, page_size, PROT_READ|PROT_WRITE);
580  free(two_pages);
581}
582
583TEST_F(VCDiffEncoderTest, ShouldNotReadPastBeginningOfBuffer) {
584  const size_t target_size = strlen(kTarget);
585
586  // Allocate two memory pages.
587  const int page_size = getpagesize();
588  void* two_pages = NULL;
589#ifdef HAVE_POSIX_MEMALIGN
590  posix_memalign(&two_pages, page_size, 2 * page_size);
591#else  // !HAVE_POSIX_MEMALIGN
592  two_pages = memalign(page_size, 2 * page_size);
593#endif  // HAVE_POSIX_MEMALIGN
594  char* const first_page = reinterpret_cast<char*>(two_pages);
595  char* const second_page = first_page + page_size;
596
597  // Make the first page unreadable.
598  mprotect(first_page, page_size, PROT_NONE);
599
600  // Place the target string at the beginning of the second page.
601  char* const target_with_guard = second_page;
602  memcpy(target_with_guard, kTarget, target_size);
603
604  // Now perform the encode operation, which will cause a segmentation fault
605  // if it reads past the beginning of the buffer.
606  EXPECT_TRUE(encoder_.StartEncoding(delta()));
607  EXPECT_TRUE(encoder_.EncodeChunk(target_with_guard, target_size, delta()));
608  EXPECT_TRUE(encoder_.FinishEncoding(delta()));
609
610  // Undo the mprotect.
611  mprotect(first_page, page_size, PROT_READ|PROT_WRITE);
612  free(two_pages);
613}
614#endif  // HAVE_MPROTECT && (HAVE_MEMALIGN || HAVE_POSIX_MEMALIGN)
615
616class VCDiffMatchCountTest : public VerifyEncodedBytesTest {
617 protected:
618  virtual ~VCDiffMatchCountTest() { }
619
620  void ExpectMatch(size_t match_size) {
621    if (match_size >= expected_match_counts_.size()) {
622      // Be generous to avoid resizing again
623      expected_match_counts_.resize(match_size * 2, 0);
624    }
625    ++expected_match_counts_[match_size];
626  }
627
628  void VerifyMatchCounts() {
629    EXPECT_TRUE(std::equal(expected_match_counts_.begin(),
630                           expected_match_counts_.end(),
631                           actual_match_counts_.begin()));
632  }
633
634  std::vector<int> expected_match_counts_;
635  std::vector<int> actual_match_counts_;
636};
637
638class VCDiffHTML1Test : public VCDiffMatchCountTest {
639 protected:
640  static const char kDictionary[];
641  static const char kTarget[];
642  static const char kRedundantTarget[];
643
644  VCDiffHTML1Test();
645  virtual ~VCDiffHTML1Test() { }
646
647  void SimpleEncode();
648  void StreamingEncode();
649
650  HashedDictionary hashed_dictionary_;
651  VCDiffStreamingEncoder encoder_;
652  VCDiffStreamingDecoder decoder_;
653  VCDiffEncoder simple_encoder_;
654  VCDiffDecoder simple_decoder_;
655
656  string result_target_;
657};
658
659const char VCDiffHTML1Test::kDictionary[] =
660    "<html><font color=red>This part from the dict</font><br>";
661
662const char VCDiffHTML1Test::kTarget[] =
663    "<html><font color=red>This part from the dict</font><br>\n"
664    "And this part is not...</html>";
665
666const char VCDiffHTML1Test::kRedundantTarget[] =
667    "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
668    "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
669    "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
670    "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";  // 256
671
672VCDiffHTML1Test::VCDiffHTML1Test()
673    : hashed_dictionary_(kDictionary, sizeof(kDictionary)),
674      encoder_(&hashed_dictionary_,
675               VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM,
676               /* look_for_target_matches = */ true),
677      simple_encoder_(kDictionary, sizeof(kDictionary)) {
678  EXPECT_TRUE(hashed_dictionary_.Init());
679}
680
681void VCDiffHTML1Test::SimpleEncode() {
682  EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta()));
683  EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
684            delta_size());
685  EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
686                                     sizeof(kDictionary),
687                                     delta_as_const(),
688                                     &result_target_));
689  EXPECT_EQ(kTarget, result_target_);
690}
691
692void VCDiffHTML1Test::StreamingEncode() {
693  EXPECT_TRUE(encoder_.StartEncoding(delta()));
694  EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta()));
695  EXPECT_TRUE(encoder_.FinishEncoding(delta()));
696}
697
698TEST_F(VCDiffHTML1Test, CheckOutputOfSimpleEncoder) {
699  SimpleEncode();
700  // These values do not depend on the block size used for encoding
701  ExpectByte(0xD6);  // 'V' | 0x80
702  ExpectByte(0xC3);  // 'C' | 0x80
703  ExpectByte(0xC4);  // 'D' | 0x80
704  ExpectByte(0x00);  // Simple encoder never uses interleaved format
705  ExpectByte(0x00);  // Hdr_Indicator
706  ExpectByte(VCD_SOURCE);  // Win_Indicator: VCD_SOURCE (dictionary)
707  ExpectByte(sizeof(kDictionary));  // Dictionary length
708  ExpectByte(0x00);  // Source segment position: start of dictionary
709  if (BlockHash::kBlockSize < 16) {
710    // A medium block size will catch the "his part " match.
711    ExpectByte(0x22);  // Length of the delta encoding
712    ExpectSize(strlen(kTarget));  // Size of the target window
713    ExpectByte(0x00);  // Delta_indicator (no compression)
714    ExpectByte(0x16);  // Length of the data section
715    ExpectByte(0x05);  // Length of the instructions section
716    ExpectByte(0x02);  // Length of the address section
717    // Data section
718    ExpectString("\nAnd t");      // Data for 1st ADD
719    ExpectString("is not...</html>");  // Data for 2nd ADD
720    // Instructions section
721    ExpectByte(0x73);  // COPY size 0 mode VCD_SAME(0)
722    ExpectByte(0x38);  // COPY size (56)
723    ExpectByte(0x07);  // ADD size 6
724    ExpectByte(0x19);  // COPY size 9 mode VCD_SELF
725    ExpectByte(0x11);  // ADD size 16
726    // Address section
727    ExpectByte(0x00);  // COPY address (0) mode VCD_SAME(0)
728    ExpectByte(0x17);  // COPY address (23) mode VCD_SELF
729  } else if (BlockHash::kBlockSize <= 56) {
730    // Any block size up to 56 will catch the matching prefix string.
731    ExpectByte(0x29);  // Length of the delta encoding
732    ExpectSize(strlen(kTarget));  // Size of the target window
733    ExpectByte(0x00);  // Delta_indicator (no compression)
734    ExpectByte(0x1F);  // Length of the data section
735    ExpectByte(0x04);  // Length of the instructions section
736    ExpectByte(0x01);  // Length of the address section
737    ExpectString("\nAnd this part is not...</html>");  // Data for ADD
738    // Instructions section
739    ExpectByte(0x73);  // COPY size 0 mode VCD_SAME(0)
740    ExpectByte(0x38);  // COPY size (56)
741    ExpectByte(0x01);  // ADD size 0
742    ExpectByte(0x1F);  // Size of ADD (31)
743    // Address section
744    ExpectByte(0x00);  // COPY address (0) mode VCD_SAME(0)
745  } else {
746    // The matching string is 56 characters long, and the block size is
747    // 64 or greater, so no match should be found.
748    ExpectSize(strlen(kTarget) + 7);  // Delta encoding len
749    ExpectSize(strlen(kTarget));  // Size of the target window
750    ExpectByte(0x00);  // Delta_indicator (no compression)
751    ExpectSize(strlen(kTarget));  // Length of the data section
752    ExpectByte(0x02);  // Length of the instructions section
753    ExpectByte(0x00);  // Length of the address section
754    // Data section
755    ExpectString(kTarget);
756    ExpectByte(0x01);  // ADD size 0
757    ExpectSize(strlen(kTarget));
758  }
759  ExpectNoMoreBytes();
760}
761
762TEST_F(VCDiffHTML1Test, MatchCounts) {
763  StreamingEncode();
764  encoder_.GetMatchCounts(&actual_match_counts_);
765  if (BlockHash::kBlockSize < 16) {
766    // A medium block size will catch the "his part " match.
767    ExpectMatch(56);
768    ExpectMatch(9);
769  } else if (BlockHash::kBlockSize <= 56) {
770    // Any block size up to 56 will catch the matching prefix string.
771    ExpectMatch(56);
772  }
773  VerifyMatchCounts();
774}
775
776TEST_F(VCDiffHTML1Test, SimpleEncoderPerformsTargetMatching) {
777  EXPECT_TRUE(simple_encoder_.Encode(kRedundantTarget,
778                                     strlen(kRedundantTarget),
779                                     delta()));
780  EXPECT_GE(strlen(kRedundantTarget) + kFileHeaderSize + kWindowHeaderSize,
781            delta_size());
782  EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
783                                     sizeof(kDictionary),
784                                     delta_as_const(),
785                                     &result_target_));
786  EXPECT_EQ(kRedundantTarget, result_target_);
787  // These values do not depend on the block size used for encoding
788  ExpectByte(0xD6);  // 'V' | 0x80
789  ExpectByte(0xC3);  // 'C' | 0x80
790  ExpectByte(0xC4);  // 'D' | 0x80
791  ExpectByte(0x00);  // Simple encoder never uses interleaved format
792  ExpectByte(0x00);  // Hdr_Indicator
793  ExpectByte(VCD_SOURCE);  // Win_Indicator: VCD_SOURCE (dictionary)
794  ExpectByte(sizeof(kDictionary));  // Dictionary length
795  ExpectByte(0x00);  // Source segment position: start of dictionary
796  ExpectByte(0x0C);  // Length of the delta encoding
797  ExpectSize(strlen(kRedundantTarget));  // Size of the target window
798  ExpectByte(0x00);  // Delta_indicator (no compression)
799  ExpectByte(0x01);  // Length of the data section
800  ExpectByte(0x04);  // Length of the instructions section
801  ExpectByte(0x01);  // Length of the address section
802  // Data section
803  ExpectString("A");      // Data for ADD
804  // Instructions section
805  ExpectByte(0x02);  // ADD size 1
806  ExpectByte(0x23);  // COPY size 0 mode VCD_HERE
807  ExpectSize(strlen(kRedundantTarget) - 1);  // COPY size 255
808  // Address section
809  ExpectByte(0x01);  // COPY address (1) mode VCD_HERE
810  ExpectNoMoreBytes();
811}
812
813TEST_F(VCDiffHTML1Test, SimpleEncoderWithoutTargetMatching) {
814  simple_encoder_.SetTargetMatching(false);
815  EXPECT_TRUE(simple_encoder_.Encode(kRedundantTarget,
816                                     strlen(kRedundantTarget),
817                                     delta()));
818  EXPECT_GE(strlen(kRedundantTarget) + kFileHeaderSize + kWindowHeaderSize,
819            delta_size());
820  EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
821                                     sizeof(kDictionary),
822                                     delta_as_const(),
823                                     &result_target_));
824  EXPECT_EQ(kRedundantTarget, result_target_);
825  // These values do not depend on the block size used for encoding
826  ExpectByte(0xD6);  // 'V' | 0x80
827  ExpectByte(0xC3);  // 'C' | 0x80
828  ExpectByte(0xC4);  // 'D' | 0x80
829  ExpectByte(0x00);  // Simple encoder never uses interleaved format
830  ExpectByte(0x00);  // Hdr_Indicator
831  ExpectByte(VCD_SOURCE);  // Win_Indicator: VCD_SOURCE (dictionary)
832  ExpectByte(sizeof(kDictionary));  // Dictionary length
833  ExpectByte(0x00);  // Source segment position: start of dictionary
834  ExpectSize(strlen(kRedundantTarget) + 0x0A);  // Length of the delta encoding
835  ExpectSize(strlen(kRedundantTarget));  // Size of the target window
836  ExpectByte(0x00);  // Delta_indicator (no compression)
837  ExpectSize(strlen(kRedundantTarget));  // Length of the data section
838  ExpectByte(0x03);  // Length of the instructions section
839  ExpectByte(0x00);  // Length of the address section
840  // Data section
841  ExpectString(kRedundantTarget);      // Data for ADD
842  // Instructions section
843  ExpectByte(0x01);  // ADD size 0
844  ExpectSize(strlen(kRedundantTarget));  // ADD size
845  // Address section empty
846  ExpectNoMoreBytes();
847}
848
849#ifdef GTEST_HAS_DEATH_TEST
850typedef VCDiffHTML1Test VCDiffHTML1DeathTest;
851
852TEST_F(VCDiffHTML1DeathTest, NullMatchCounts) {
853  EXPECT_DEBUG_DEATH(encoder_.GetMatchCounts(NULL), "GetMatchCounts");
854}
855#endif  // GTEST_HAS_DEATH_TEST
856
857class VCDiffHTML2Test : public VCDiffMatchCountTest {
858 protected:
859  static const char kDictionary[];
860  static const char kTarget[];
861
862  VCDiffHTML2Test();
863  virtual ~VCDiffHTML2Test() { }
864
865  void SimpleEncode();
866  void StreamingEncode();
867
868  HashedDictionary hashed_dictionary_;
869  VCDiffStreamingEncoder encoder_;
870  VCDiffStreamingDecoder decoder_;
871  VCDiffEncoder simple_encoder_;
872  VCDiffDecoder simple_decoder_;
873
874  string result_target_;
875};
876
877const char VCDiffHTML2Test::kDictionary[] = "10\nThis is a test";
878
879const char VCDiffHTML2Test::kTarget[] = "This is a test!!!\n";
880
881VCDiffHTML2Test::VCDiffHTML2Test()
882    : hashed_dictionary_(kDictionary, sizeof(kDictionary)),
883      encoder_(&hashed_dictionary_,
884               VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM,
885               /* look_for_target_matches = */ true),
886      simple_encoder_(kDictionary, sizeof(kDictionary)) {
887  EXPECT_TRUE(hashed_dictionary_.Init());
888}
889
890void VCDiffHTML2Test::SimpleEncode() {
891  EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta()));
892  EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
893            delta_size());
894  EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
895                                     sizeof(kDictionary),
896                                     delta_as_const(),
897                                     &result_target_));
898  EXPECT_EQ(kTarget, result_target_);
899}
900
901void VCDiffHTML2Test::StreamingEncode() {
902  EXPECT_TRUE(encoder_.StartEncoding(delta()));
903  EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta()));
904  EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
905            delta_size());
906  EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
907                                     sizeof(kDictionary),
908                                     delta_as_const(),
909                                     &result_target_));
910  EXPECT_EQ(kTarget, result_target_);
911}
912
913TEST_F(VCDiffHTML2Test, VerifyOutputOfSimpleEncoder) {
914  SimpleEncode();
915  // These values do not depend on the block size used for encoding
916  ExpectByte(0xD6);  // 'V' | 0x80
917  ExpectByte(0xC3);  // 'C' | 0x80
918  ExpectByte(0xC4);  // 'D' | 0x80
919  ExpectByte(0x00);  // Simple encoder never uses interleaved format
920  ExpectByte(0x00);  // Hdr_Indicator
921  ExpectByte(VCD_SOURCE);  // Win_Indicator: VCD_SOURCE (dictionary)
922  ExpectByte(sizeof(kDictionary));  // Dictionary length
923  ExpectByte(0x00);  // Source segment position: start of dictionary
924  if (BlockHash::kBlockSize <= 8) {
925    ExpectByte(12);  // Length of the delta encoding
926    ExpectSize(strlen(kTarget));  // Size of the target window
927    ExpectByte(0x00);  // Delta_indicator (no compression)
928    ExpectByte(0x04);  // Length of the data section
929    ExpectByte(0x02);  // Length of the instructions section
930    ExpectByte(0x01);  // Length of the address section
931    ExpectByte('!');
932    ExpectByte('!');
933    ExpectByte('!');
934    ExpectByte('\n');
935    ExpectByte(0x1E);  // COPY size 14 mode VCD_SELF
936    ExpectByte(0x05);  // ADD size 4
937    ExpectByte(0x03);  // COPY address (3) mode VCD_SELF
938  } else {
939    // Larger block sizes will not catch any matches.
940    ExpectSize(strlen(kTarget) + 7);  // Delta encoding len
941    ExpectSize(strlen(kTarget));  // Size of the target window
942    ExpectByte(0x00);  // Delta_indicator (no compression)
943    ExpectSize(strlen(kTarget));  // Length of the data section
944    ExpectByte(0x02);  // Length of the instructions section
945    ExpectByte(0x00);  // Length of the address section
946    // Data section
947    ExpectString(kTarget);
948    ExpectByte(0x01);  // ADD size 0
949    ExpectSize(strlen(kTarget));
950  }
951  ExpectNoMoreBytes();
952}
953
954TEST_F(VCDiffHTML2Test, VerifyOutputWithChecksum) {
955  StreamingEncode();
956  const VCDChecksum html2_checksum = ComputeAdler32(kTarget, strlen(kTarget));
957  CHECK_EQ(5, VarintBE<int64_t>::Length(html2_checksum));
958  // These values do not depend on the block size used for encoding
959  ExpectByte(0xD6);  // 'V' | 0x80
960  ExpectByte(0xC3);  // 'C' | 0x80
961  ExpectByte(0xC4);  // 'D' | 0x80
962  ExpectByte('S');  // Format extensions
963  ExpectByte(0x00);  // Hdr_Indicator
964  ExpectByte(VCD_SOURCE | VCD_CHECKSUM);  // Win_Indicator
965  ExpectByte(sizeof(kDictionary));  // Dictionary length
966  ExpectByte(0x00);  // Source segment position: start of dictionary
967  if (BlockHash::kBlockSize <= 8) {
968    ExpectByte(17);  // Length of the delta encoding
969    ExpectSize(strlen(kTarget));  // Size of the target window
970    ExpectByte(0x00);  // Delta_indicator (no compression)
971    ExpectByte(0x00);  // Length of the data section
972    ExpectByte(0x07);  // Length of the instructions section
973    ExpectByte(0x00);  // Length of the address section
974    ExpectChecksum(html2_checksum);
975    ExpectByte(0x1E);  // COPY size 14 mode VCD_SELF
976    ExpectByte(0x03);  // COPY address (3) mode VCD_SELF
977    ExpectByte(0x05);  // ADD size 4
978    ExpectByte('!');
979    ExpectByte('!');
980    ExpectByte('!');
981    ExpectByte('\n');
982  } else {
983    // Larger block sizes will not catch any matches.
984    ExpectSize(strlen(kTarget) + 12);  // Delta encoding len
985    ExpectSize(strlen(kTarget));  // Size of the target window
986    ExpectByte(0x00);  // Delta_indicator (no compression)
987    ExpectByte(0x00);  // Length of the data section
988    ExpectSize(0x02 + strlen(kTarget));  // Interleaved
989    ExpectByte(0x00);  // Length of the address section
990    ExpectChecksum(html2_checksum);
991    // Data section
992    ExpectByte(0x01);  // ADD size 0
993    ExpectSize(strlen(kTarget));
994    ExpectString(kTarget);
995  }
996  ExpectNoMoreBytes();
997}
998
999TEST_F(VCDiffHTML2Test, MatchCounts) {
1000  StreamingEncode();
1001  encoder_.GetMatchCounts(&actual_match_counts_);
1002  if (BlockHash::kBlockSize <= 8) {
1003    ExpectMatch(14);
1004  }
1005  VerifyMatchCounts();
1006}
1007
1008}  // anonymous namespace
1009}  // namespace open_vcdiff
1010