172a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Use of this source code is governed by a BSD-style license that can be
3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// found in the LICENSE file.
4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// SdchFilter applies open_vcdiff content decoding to a datastream.
6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// This decoding uses a pre-cached dictionary of text fragments to decode
7c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// (expand) the stream back to its original contents.
8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// This SdchFilter internally uses open_vcdiff/vcdec library to do decoding.
10c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// SdchFilter is also a subclass of Filter. See the latter's header file
12c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// filter.h for sample usage.
13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#ifndef NET_BASE_SDCH_FILTER_H_
15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#define NET_BASE_SDCH_FILTER_H_
163345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#pragma once
17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <string>
19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
20ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/memory/scoped_ptr.h"
21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "net/base/filter.h"
22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "net/base/sdch_manager.h"
23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace open_vcdiff {
2572a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsenclass VCDiffStreamingDecoder;
26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
27c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
2872a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsennamespace net {
2972a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen
30c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass SdchFilter : public Filter {
31c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott public:
32c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  virtual ~SdchFilter();
33c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
34c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Initializes filter decoding mode and internal control blocks.
35c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool InitDecoding(Filter::FilterType filter_type);
36c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
37c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Decode the pre-filter data and writes the output into |dest_buffer|
38c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // The function returns FilterStatus. See filter.h for its description.
39c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  //
40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Upon entry, *dest_len is the total size (in number of chars) of the
41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // destination buffer. Upon exit, *dest_len is the actual number of chars
42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // written into the destination buffer.
43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  virtual FilterStatus ReadFilteredData(char* dest_buffer, int* dest_len);
44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott private:
46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Internal status.  Once we enter an error state, we stop processing data.
47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  enum DecodingStatus {
48c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DECODING_UNINITIALIZED,
49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    WAITING_FOR_DICTIONARY_SELECTION,
50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DECODING_IN_PROGRESS,
51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DECODING_ERROR,
52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    META_REFRESH_RECOVERY,  // Decoding error being handled by a meta-refresh.
53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    PASS_THROUGH,  // Non-sdch content being passed without alteration.
54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  };
55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
56ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // Only to be instantiated by Filter::Factory.
57ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  explicit SdchFilter(const FilterContext& filter_context);
58ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  friend class Filter;
59ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Identify the suggested dictionary, and initialize underlying decompressor.
61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  Filter::FilterStatus InitializeDictionary();
62c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
63c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Move data that was internally buffered (after decompression) to the
64c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // specified dest_buffer.
65c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int OutputBufferExcess(char* const dest_buffer, size_t available_space);
66c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
67ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // Context data from the owner of this filter.
68ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  const FilterContext& filter_context_;
69ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
70c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Tracks the status of decoding.
71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // This variable is initialized by InitDecoding and updated only by
72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // ReadFilteredData.
73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DecodingStatus decoding_status_;
74c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
75c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // The underlying decoder that processes data.
76c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // This data structure is initialized by InitDecoding and updated in
77c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // ReadFilteredData.
78c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  scoped_ptr<open_vcdiff::VCDiffStreamingDecoder> vcdiff_streaming_decoder_;
79c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
80c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // In case we need to assemble the hash piecemeal, we have a place to store
81c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // a part of the hash until we "get all 8 bytes plus a null."
82c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::string dictionary_hash_;
83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // After assembling an entire dictionary hash (the first 9 bytes of the
85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // sdch payload, we check to see if it is plausible, meaning it has a null
86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // termination, and has 8 characters that are possible in a net-safe base64
87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // encoding.  If the hash is not plausible, then the payload is probably not
88c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // an SDCH encoded bundle, and various error recovery strategies can be
89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // attempted.
90c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool dictionary_hash_is_plausible_;
91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
92c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // We hold an in-memory copy of the dictionary during the entire decoding, as
93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // it is used directly by the VC-DIFF decoding system.
94c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // That char* data is part of the dictionary_ we hold a reference to.
95c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  scoped_refptr<SdchManager::Dictionary> dictionary_;
96c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // The decoder may demand a larger output buffer than the target of
98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // ReadFilteredData so we buffer the excess output between calls.
99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::string dest_buffer_excess_;
100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // To avoid moving strings around too much, we save the index into
101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // dest_buffer_excess_ that has the next byte to output.
102c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  size_t dest_buffer_excess_index_;
103c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
104c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // To get stats on activities, we keep track of source and target bytes.
105c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Visit about:histograms/Sdch to see histogram data.
106c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  size_t source_bytes_;
107c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  size_t output_bytes_;
108c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
109c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Error recovery in content type may add an sdch filter type, in which case
110c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // we should gracefully perform pass through if the format is incorrect, or
111c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // an applicable dictionary can't be found.
112c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool possible_pass_through_;
113c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
114c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // The URL that is currently being filtered.
115c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // This is used to restrict use of a dictionary to a specific URL or path.
116c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  GURL url_;
117c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
118c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // To facilitate error recovery, allow filter to know if content is text/html
119c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // by checking within this mime type (we may do a meta-refresh via html).
120c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::string mime_type_;
121c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
122c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DISALLOW_COPY_AND_ASSIGN(SdchFilter);
123c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott};
124c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
12572a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen}  // namespace net
12672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen
127c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#endif  // NET_BASE_SDCH_FILTER_H_
128