filter.h revision effb81e5f8246d0db0270817048dc992db66e9fb
1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// Filter performs filtering on data streams. Sample usage:
6//
7//   IStream* pre_filter_source;
8//   ...
9//   Filter* filter = Filter::Factory(filter_type, size);
10//   int pre_filter_data_len = filter->stream_buffer_size();
11//   pre_filter_source->read(filter->stream_buffer(), pre_filter_data_len);
12//
13//   filter->FlushStreamBuffer(pre_filter_data_len);
14//
15//   char post_filter_buf[kBufferSize];
16//   int post_filter_data_len = kBufferSize;
17//   filter->ReadFilteredData(post_filter_buf, &post_filter_data_len);
18//
19// To filter a data stream, the caller first gets filter's stream_buffer_
20// through its accessor and fills in stream_buffer_ with pre-filter data, next
21// calls FlushStreamBuffer to notify Filter, then calls ReadFilteredData
22// repeatedly to get all the filtered data. After all data have been fitlered
23// and read out, the caller may fill in stream_buffer_ again. This
24// WriteBuffer-Flush-Read cycle is repeated until reaching the end of data
25// stream.
26//
27// The lifetime of a Filter instance is completely controlled by its caller.
28
29#ifndef NET_FILTER_FILTER_H__
30#define NET_FILTER_FILTER_H__
31
32#include <string>
33#include <vector>
34
35#include "base/basictypes.h"
36#include "base/gtest_prod_util.h"
37#include "base/memory/ref_counted.h"
38#include "base/memory/scoped_ptr.h"
39#include "base/time/time.h"
40#include "net/base/net_export.h"
41
42class GURL;
43
44namespace net {
45
46class IOBuffer;
47
48//------------------------------------------------------------------------------
49// Define an interface class that allows access to contextual information
50// supplied by the owner of this filter.  In the case where there are a chain of
51// filters, there is only one owner of all the chained filters, and that context
52// is passed to the constructor of all those filters.  To be clear, the context
53// does NOT reflect the position in a chain, or the fact that there are prior
54// or later filters in a chain.
55class NET_EXPORT_PRIVATE FilterContext {
56 public:
57  // Enum to control what histograms are emitted near end-of-life of this
58  // instance.
59  enum StatisticSelector {
60    SDCH_DECODE,
61    SDCH_PASSTHROUGH,
62    SDCH_EXPERIMENT_DECODE,
63    SDCH_EXPERIMENT_HOLDBACK,
64  };
65
66  virtual ~FilterContext();
67
68  // What mime type was specified in the header for this data?
69  // Only makes senses for some types of contexts, and returns false
70  // when not applicable.
71  virtual bool GetMimeType(std::string* mime_type) const = 0;
72
73  // What URL was used to access this data?
74  // Return false if gurl is not present.
75  virtual bool GetURL(GURL* gurl) const = 0;
76
77  // What Content-Disposition header came with this data?
78  // Return false if no header was present.
79  virtual bool GetContentDisposition(std::string* disposition) const = 0;
80
81  // When was this data requested from a server?
82  virtual base::Time GetRequestTime() const = 0;
83
84  // Is data supplied from cache, or fresh across the net?
85  virtual bool IsCachedContent() const = 0;
86
87  // Is this a download?
88  virtual bool IsDownload() const = 0;
89
90  // Was this data flagged as a response to a request with an SDCH dictionary?
91  virtual bool IsSdchResponse() const = 0;
92
93  // How many bytes were read from the net or cache so far (and potentially
94  // pushed into a filter for processing)?
95  virtual int64 GetByteReadCount() const = 0;
96
97  // What response code was received with the associated network transaction?
98  // For example: 200 is ok.   4xx are error codes. etc.
99  virtual int GetResponseCode() const = 0;
100
101  // The following method forces the context to emit a specific set of
102  // statistics as selected by the argument.
103  virtual void RecordPacketStats(StatisticSelector statistic) const = 0;
104};
105
106//------------------------------------------------------------------------------
107class NET_EXPORT_PRIVATE Filter {
108 public:
109  // Return values of function ReadFilteredData.
110  enum FilterStatus {
111    // Read filtered data successfully
112    FILTER_OK,
113    // Read filtered data successfully, and the data in the buffer has been
114    // consumed by the filter, but more data is needed in order to continue
115    // filtering.  At this point, the caller is free to reuse the filter
116    // buffer to provide more data.
117    FILTER_NEED_MORE_DATA,
118    // Read filtered data successfully, and filter reaches the end of the data
119    // stream.
120    FILTER_DONE,
121    // There is an error during filtering.
122    FILTER_ERROR
123  };
124
125  // Specifies type of filters that can be created.
126  enum FilterType {
127    FILTER_TYPE_DEFLATE,
128    FILTER_TYPE_GZIP,
129    FILTER_TYPE_GZIP_HELPING_SDCH,  // Gzip possible, but pass through allowed.
130    FILTER_TYPE_SDCH,
131    FILTER_TYPE_SDCH_POSSIBLE,  // Sdch possible, but pass through allowed.
132    FILTER_TYPE_UNSUPPORTED,
133  };
134
135  virtual ~Filter();
136
137  // Creates a Filter object.
138  // Parameters: Filter_types specifies the type of filter created;
139  // filter_context allows filters to acquire additional details needed for
140  // construction and operation, such as a specification of requisite input
141  // buffer size.
142  // If success, the function returns the pointer to the Filter object created.
143  // If failed or a filter is not needed, the function returns NULL.
144  //
145  // Note: filter_types is an array of filter types (content encoding types as
146  // provided in an HTTP header), which will be chained together serially to do
147  // successive filtering of data.  The types in the vector are ordered based on
148  // encoding order, and the filters are chained to operate in the reverse
149  // (decoding) order. For example, types[0] = FILTER_TYPE_SDCH,
150  // types[1] = FILTER_TYPE_GZIP will cause data to first be gunzip filtered,
151  // and the resulting output from that filter will be sdch decoded.
152  static Filter* Factory(const std::vector<FilterType>& filter_types,
153                         const FilterContext& filter_context);
154
155  // A simpler version of Factory() which creates a single, unchained
156  // Filter of type FILTER_TYPE_GZIP, or NULL if the filter could not be
157  // initialized.
158  static Filter* GZipFactory();
159
160  // External call to obtain data from this filter chain.  If ther is no
161  // next_filter_, then it obtains data from this specific filter.
162  FilterStatus ReadData(char* dest_buffer, int* dest_len);
163
164  // Returns a pointer to the stream_buffer_.
165  IOBuffer* stream_buffer() const { return stream_buffer_.get(); }
166
167  // Returns the maximum size of stream_buffer_ in number of chars.
168  int stream_buffer_size() const { return stream_buffer_size_; }
169
170  // Returns the total number of chars remaining in stream_buffer_ to be
171  // filtered.
172  //
173  // If the function returns 0 then all data has been filtered, and the caller
174  // is safe to copy new data into stream_buffer_.
175  int stream_data_len() const { return stream_data_len_; }
176
177  // Flushes stream_buffer_ for next round of filtering. After copying data to
178  // stream_buffer_, the caller should call this function to notify Filter to
179  // start filtering. Then after this function is called, the caller can get
180  // post-filtered data using ReadFilteredData. The caller must not write to
181  // stream_buffer_ and call this function again before stream_buffer_ is
182  // emptied out by ReadFilteredData.
183  //
184  // The input stream_data_len is the length (in number of chars) of valid
185  // data in stream_buffer_. It can not be greater than stream_buffer_size_.
186  // The function returns true if success, and false otherwise.
187  bool FlushStreamBuffer(int stream_data_len);
188
189  // Translate the text of a filter name (from Content-Encoding header) into a
190  // FilterType.
191  static FilterType ConvertEncodingToType(const std::string& filter_type);
192
193  // Given a array of encoding_types, try to do some error recovery adjustment
194  // to the list.  This includes handling known bugs in the Apache server (where
195  // redundant gzip encoding is specified), as well as issues regarding SDCH
196  // encoding, where various proxies and anti-virus products modify or strip the
197  // encodings.  These fixups require context, which includes whether this
198  // response was made to an SDCH request (i.e., an available dictionary was
199  // advertised in the GET), as well as the mime type of the content.
200  static void FixupEncodingTypes(const FilterContext& filter_context,
201                                 std::vector<FilterType>* encoding_types);
202
203 protected:
204  friend class GZipUnitTest;
205  friend class SdchFilterChainingTest;
206
207  Filter();
208
209  // Filters the data stored in stream_buffer_ and writes the output into the
210  // dest_buffer passed in.
211  //
212  // Upon entry, *dest_len is the total size (in number of chars) of the
213  // destination buffer. Upon exit, *dest_len is the actual number of chars
214  // written into the destination buffer.
215  //
216  // This function will fail if there is no pre-filter data in the
217  // stream_buffer_. On the other hand, *dest_len can be 0 upon successful
218  // return. For example, a decoding filter may process some pre-filter data
219  // but not produce output yet.
220  virtual FilterStatus ReadFilteredData(char* dest_buffer, int* dest_len) = 0;
221
222  // Copy pre-filter data directly to destination buffer without decoding.
223  FilterStatus CopyOut(char* dest_buffer, int* dest_len);
224
225  FilterStatus last_status() const { return last_status_; }
226
227  // Buffer to hold the data to be filtered (the input queue).
228  scoped_refptr<IOBuffer> stream_buffer_;
229
230  // Maximum size of stream_buffer_ in number of chars.
231  int stream_buffer_size_;
232
233  // Pointer to the next data in stream_buffer_ to be filtered.
234  char* next_stream_data_;
235
236  // Total number of remaining chars in stream_buffer_ to be filtered.
237  int stream_data_len_;
238
239 private:
240  // Allocates and initializes stream_buffer_ and stream_buffer_size_.
241  void InitBuffer(int size);
242
243  // A factory helper for creating filters for within a chain of potentially
244  // multiple encodings.  If a chain of filters is created, then this may be
245  // called multiple times during the filter creation process.  In most simple
246  // cases, this is only called once. Returns NULL and cleans up (deleting
247  // filter_list) if a new filter can't be constructed.
248  static Filter* PrependNewFilter(FilterType type_id,
249                                  const FilterContext& filter_context,
250                                  int buffer_size,
251                                  Filter* filter_list);
252
253  // Helper methods for PrependNewFilter. If initialization is successful,
254  // they return a fully initialized Filter. Otherwise, return NULL.
255  static Filter* InitGZipFilter(FilterType type_id, int buffer_size);
256  static Filter* InitSdchFilter(FilterType type_id,
257                                const FilterContext& filter_context,
258                                int buffer_size);
259
260  // Helper function to empty our output into the next filter's input.
261  void PushDataIntoNextFilter();
262
263  // Constructs a filter with an internal buffer of the given size.
264  // Only meant to be called by unit tests that need to control the buffer size.
265  static Filter* FactoryForTests(const std::vector<FilterType>& filter_types,
266                                 const FilterContext& filter_context,
267                                 int buffer_size);
268
269  // An optional filter to process output from this filter.
270  scoped_ptr<Filter> next_filter_;
271  // Remember what status or local filter last returned so we can better handle
272  // chained filters.
273  FilterStatus last_status_;
274
275  DISALLOW_COPY_AND_ASSIGN(Filter);
276};
277
278}  // namespace net
279
280#endif  // NET_FILTER_FILTER_H__
281