1// Copyright 2008 Google Inc.
2// Author: Lincoln Smith
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8//      http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16// Classes to implement the Code Table
17// described in sections 5.5, 5.6 and 7 of
18// RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
19// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
20
21#ifndef OPEN_VCDIFF_CODETABLE_H_
22#define OPEN_VCDIFF_CODETABLE_H_
23
24#include <config.h>
25#include <stdint.h>             // uint16_t
26
27namespace open_vcdiff {
28
29// The instruction types from section 5.5 (mistakenly labeled 5.4) of the RFC.
30//
31enum VCDiffInstructionType {
32  VCD_NOOP = 0,
33  VCD_ADD  = 1,
34  VCD_RUN  = 2,
35  VCD_COPY = 3,
36  VCD_LAST_INSTRUCTION_TYPE = VCD_COPY,
37  // The following values are not true instruction types, but rather
38  // special condition values for functions that return VCDiffInstructionType.
39  VCD_INSTRUCTION_ERROR = 4,
40  VCD_INSTRUCTION_END_OF_DATA = 5
41};
42
43const char* VCDiffInstructionName(VCDiffInstructionType inst);
44
45// OpcodeOrNone: An opcode is a value between 0-255.  There is not room
46// in a single byte to express all these values plus a "no opcode found"
47// value.  So use a 16-bit integer to hold either an opcode or kNoOpcode.
48//
49typedef uint16_t OpcodeOrNone;
50const OpcodeOrNone kNoOpcode = 0x100;  // outside the opcode range 0x00 - 0xFF
51
52// struct VCDiffCodeTableData:
53//
54// A representation of the VCDiff code table as six 256-byte arrays
55// as described in Section 7 of RFC 3284.  Each instruction code
56// can represent up to two delta instructions, which is why inst,
57// size, and mode each appear twice.  Each of the two delta instructions
58// has the following three attributes:
59// * inst (NOOP, ADD, RUN, or COPY)
60// * size (0-255 bytes) of the data to be copied; if this value is zero, then
61//   the size will be encoded separately from the instruction code, as a Varint
62// * mode (SELF, HERE, NEAR(n), or SAME(n)), only used for COPY instructions
63//
64// Every valid code table should contain AT LEAST the following instructions:
65//     inst1=ADD  size1=0 mode1=X inst2=NOOP size2=X mode2=X
66//     inst1=RUN  size1=0 mode1=X inst2=NOOP size2=X mode2=X
67//     inst1=COPY size1=0 mode1=N inst2=NOOP size2=X mode2=X (for all N)
68// ... where X represents a "don't care" value which will not be read,
69// and N stands for every possible COPY mode between 0 and
70// ([same cache size] + [here cache size]) inclusive.
71// Without these instructions, it will be impossible to guarantee that
72// all ADD, RUN, and COPY encoding requests can be fulfilled.
73//
74struct VCDiffCodeTableData {
75  static const int kCodeTableSize = 256;
76
77  static const VCDiffCodeTableData kDefaultCodeTableData;
78
79  // Validates that the data contained in the VCDiffCodeTableData structure
80  // does not violate certain assumptions.  Returns true if none of these
81  // assumptions are violated, or false if an unexpected value is found.
82  // This function should be called on any non-default code table that is
83  // received as part of an encoded transmission.
84  // max_mode is the maximum value for the mode of a COPY instruction;
85  // this is equal to same_cache_size + near_cache_size + 1.
86  //
87  bool Validate(unsigned char max_mode) const;
88
89  // This version of Validate() assumes that the default address cache sizes
90  // are being used, and calculates max_mode based on that assumption.
91  bool Validate() const;
92
93  // The names of these elements are taken from RFC 3284 section 5.4
94  // (Instruction Codes), which contains the following specification:
95  //
96  // Each instruction code entry contains six fields, each of which is a single
97  // byte with an unsigned value:
98  // +-----------------------------------------------+
99  // | inst1 | size1 | mode1 | inst2 | size2 | mode2 |
100  // +-----------------------------------------------+
101  //
102  unsigned char inst1[kCodeTableSize];  // from enum VCDiffInstructionType
103  unsigned char inst2[kCodeTableSize];  // from enum VCDiffInstructionType
104  unsigned char size1[kCodeTableSize];
105  unsigned char size2[kCodeTableSize];
106  unsigned char mode1[kCodeTableSize];  // from enum VCDiffModes
107  unsigned char mode2[kCodeTableSize];  // from enum VCDiffModes
108
109 private:
110  // Single-letter abbreviations that make it easier to read
111  // the default code table data.
112  static const VCDiffInstructionType N = VCD_NOOP;
113  static const VCDiffInstructionType A = VCD_ADD;
114  static const VCDiffInstructionType R = VCD_RUN;
115  static const VCDiffInstructionType C = VCD_COPY;
116
117  static bool ValidateOpcode(int opcode,
118                             unsigned char inst,
119                             unsigned char size,
120                             unsigned char mode,
121                             unsigned char max_mode,
122                             const char* first_or_second);
123};
124
125}  // namespace open_vcdiff
126
127#endif  // OPEN_VCDIFF_CODETABLE_H_
128