172a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Use of this source code is governed by a BSD-style license that can be
3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// found in the LICENSE file.
4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Provides global database of differential decompression dictionaries for the
6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// SDCH filter (processes sdch enconded content).
7c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Exactly one instance of SdchManager is built, and all references are made
9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// into that collection.
10c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// The SdchManager maintains a collection of memory resident dictionaries.  It
12c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// can find a dictionary (based on a server specification of a hash), store a
13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// dictionary, and make judgements about what URLs can use, set, etc. a
14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// dictionary.
15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// These dictionaries are acquired over the net, and include a header
17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// (containing metadata) as well as a VCDIFF dictionary (for use by a VCDIFF
18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// module) to decompress data.
19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#ifndef NET_BASE_SDCH_MANAGER_H_
21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#define NET_BASE_SDCH_MANAGER_H_
223345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#pragma once
23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <map>
25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <set>
26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <string>
27c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
283345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "base/gtest_prod_util.h"
29ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/memory/ref_counted.h"
30ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/memory/scoped_ptr.h"
31c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/time.h"
32c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "googleurl/src/gurl.h"
33c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
3472a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsennamespace net {
3572a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen
36c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//------------------------------------------------------------------------------
37c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Create a public interface to help us load SDCH dictionaries.
38c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// The SdchManager class allows registration to support this interface.
39c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// A browser may register a fetcher that is used by the dictionary managers to
40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// get data from a specified URL.  This allows us to use very high level browser
41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// functionality in this base (when the functionaity can be provided).
42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass SdchFetcher {
43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott public:
44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  SdchFetcher() {}
45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  virtual ~SdchFetcher() {}
46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // The Schedule() method is called when there is a need to get a dictionary
48c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // from a server.  The callee is responsible for getting that dictionary_text,
49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // and then calling back to AddSdchDictionary() to the SdchManager instance.
50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  virtual void Schedule(const GURL& dictionary_url) = 0;
51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott private:
52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DISALLOW_COPY_AND_ASSIGN(SdchFetcher);
53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott};
5472a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen
55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//------------------------------------------------------------------------------
56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass SdchManager {
58c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott public:
59c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // A list of errors that appeared and were either resolved, or used to turn
60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // off sdch encoding.
61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  enum ProblemCodes {
62c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    MIN_PROBLEM_CODE,
63c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
64c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Content-encoding correction problems.
65c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ADDED_CONTENT_ENCODING = 1,
66c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    FIXED_CONTENT_ENCODING = 2,
67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    FIXED_CONTENT_ENCODINGS = 3,
68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
69c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Content decoding errors.
70c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DECODE_HEADER_ERROR = 4,
71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DECODE_BODY_ERROR = 5,
72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // More content-encoding correction problems.
74c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    OPTIONAL_GUNZIP_ENCODING_ADDED = 6,
75c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
76c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Content encoding correction when we're not even tagged as HTML!?!
77c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    BINARY_ADDED_CONTENT_ENCODING = 7,
78c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    BINARY_FIXED_CONTENT_ENCODING = 8,
79c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    BINARY_FIXED_CONTENT_ENCODINGS = 9,
80c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
81c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Dictionary selection for use problems.
82c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DICTIONARY_FOUND_HAS_WRONG_DOMAIN = 10,
83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DICTIONARY_FOUND_HAS_WRONG_PORT_LIST = 11,
84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DICTIONARY_FOUND_HAS_WRONG_PATH = 12,
85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DICTIONARY_FOUND_HAS_WRONG_SCHEME = 13,
86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DICTIONARY_HASH_NOT_FOUND = 14,
87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DICTIONARY_HASH_MALFORMED = 15,
88c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Dictionary saving problems.
90c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DICTIONARY_HAS_NO_HEADER = 20,
91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DICTIONARY_HEADER_LINE_MISSING_COLON = 21,
92c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DICTIONARY_MISSING_DOMAIN_SPECIFIER = 22,
93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN = 23,
94c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL = 24,
95c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL = 25,
96c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DICTIONARY_HAS_NO_TEXT = 26,
97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX = 27,
98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Dictionary loading problems.
100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST = 30,
101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DICTIONARY_SELECTED_FOR_SSL = 31,
102c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DICTIONARY_ALREADY_LOADED = 32,
103c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DICTIONARY_SELECTED_FROM_NON_HTTP = 33,
104c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DICTIONARY_IS_TOO_LARGE= 34,
105c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DICTIONARY_COUNT_EXCEEDED = 35,
106c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DICTIONARY_ALREADY_SCHEDULED_TO_DOWNLOAD = 36,
107c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DICTIONARY_ALREADY_TRIED_TO_DOWNLOAD = 37,
108c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
109c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Failsafe hack.
110c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    ATTEMPT_TO_DECODE_NON_HTTP_DATA = 40,
111c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
112c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
113c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Content-Encoding problems detected, with no action taken.
114c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    MULTIENCODING_FOR_NON_SDCH_REQUEST = 50,
115c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    SDCH_CONTENT_ENCODE_FOR_NON_SDCH_REQUEST = 51,
116c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
117c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Dictionary manager issues.
118c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DOMAIN_BLACKLIST_INCLUDES_TARGET = 61,
119c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
120c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Problematic decode recovery methods.
121c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    META_REFRESH_RECOVERY = 70,            // Dictionary not found.
122c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // defunct =  71, // Almost the same as META_REFRESH_UNSUPPORTED.
123c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // defunct = 72,  // Almost the same as CACHED_META_REFRESH_UNSUPPORTED.
124c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // defunct = 73,  // PASSING_THROUGH_NON_SDCH plus DISCARD_TENTATIVE_SDCH.
125c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    META_REFRESH_UNSUPPORTED = 74,         // Unrecoverable error.
126c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    CACHED_META_REFRESH_UNSUPPORTED = 75,  // As above, but pulled from cache.
127c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    PASSING_THROUGH_NON_SDCH = 76,  // Tagged sdch but missing dictionary-hash.
128c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    INCOMPLETE_SDCH_CONTENT = 77,   // Last window was not completely decoded.
129c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    PASS_THROUGH_404_CODE = 78,     // URL not found message passing through.
130c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
131c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // This next report is very common, and not really an error scenario, but
132c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // it exercises the error recovery logic.
133c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    PASS_THROUGH_OLD_CACHED = 79,   // Back button got pre-SDCH cached content.
134c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
135c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Common decoded recovery methods.
136c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    META_REFRESH_CACHED_RECOVERY = 80,  // Probably startup tab loading.
137c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DISCARD_TENTATIVE_SDCH = 81,        // Server decided not to use sdch.
138c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
139c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Non SDCH problems, only accounted for to make stat counting complete
140c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // (i.e., be able to be sure all dictionary advertisements are accounted
141c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // for).
142c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
143c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    UNFLUSHED_CONTENT = 90,    // Possible error in filter chaining.
144c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // defunct = 91,           // MISSING_TIME_STATS (Should never happen.)
145c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    CACHE_DECODED = 92,        // No timing stats recorded.
146c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // defunct = 93,           // OVER_10_MINUTES (No timing stats recorded.)
147c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    UNINITIALIZED = 94,        // Filter never even got initialized.
148c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    PRIOR_TO_DICTIONARY = 95,  // We hadn't even parsed a dictionary selector.
149c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DECODE_ERROR = 96,         // Something went wrong during decode.
150c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
151c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Problem during the latency test.
152c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    LATENCY_TEST_DISALLOWED = 100,  // SDCH now failing, but it worked before!
153c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
154c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    MAX_PROBLEM_CODE  // Used to bound histogram.
155c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  };
156c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
157c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Use the following static limits to block DOS attacks until we implement
158c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // a cached dictionary evicition strategy.
159c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static const size_t kMaxDictionarySize;
160c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static const size_t kMaxDictionaryCount;
161c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
162c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // There is one instance of |Dictionary| for each memory-cached SDCH
163c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // dictionary.
164c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  class Dictionary : public base::RefCounted<Dictionary> {
165c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott   public:
166c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Sdch filters can get our text to use in decoding compressed data.
167c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const std::string& text() const { return text_; }
168c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
169c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott   private:
170c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    friend class base::RefCounted<Dictionary>;
171c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    friend class SdchManager;  // Only manager can construct an instance.
1723345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    FRIEND_TEST_ALL_PREFIXES(SdchFilterTest, PathMatch);
173c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
174c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Construct a vc-diff usable dictionary from the dictionary_text starting
175c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // at the given offset.  The supplied client_hash should be used to
176c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // advertise the dictionary's availability relative to the suppplied URL.
17772a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen    Dictionary(const std::string& dictionary_text,
17872a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen               size_t offset,
17972a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen               const std::string& client_hash,
18072a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen               const GURL& url,
18172a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen               const std::string& domain,
18272a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen               const std::string& path,
18372a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen               const base::Time& expiration,
18472a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen               const std::set<int>& ports);
1853345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    ~Dictionary();
186c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
187c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const GURL& url() const { return url_; }
188c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const std::string& client_hash() const { return client_hash_; }
189c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
190c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Security method to check if we can advertise this dictionary for use
191c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // if the |target_url| returns SDCH compressed data.
192c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    bool CanAdvertise(const GURL& target_url);
193c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
194c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Security methods to check if we can establish a new dictionary with the
195c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // given data, that arrived in response to get of dictionary_url.
196c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    static bool CanSet(const std::string& domain, const std::string& path,
19772a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen                       const std::set<int>& ports, const GURL& dictionary_url);
198c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
199c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Security method to check if we can use a dictionary to decompress a
200c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // target that arrived with a reference to this dictionary.
201c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    bool CanUse(const GURL& referring_url);
202c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
203c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Compare paths to see if they "match" for dictionary use.
204c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    static bool PathMatch(const std::string& path,
205c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                          const std::string& restriction);
206c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
207c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Compare domains to see if the "match" for dictionary use.
208c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    static bool DomainMatch(const GURL& url, const std::string& restriction);
209c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
210c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
211c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // The actual text of the dictionary.
212c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    std::string text_;
213c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
214c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Part of the hash of text_ that the client uses to advertise the fact that
215c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // it has a specific dictionary pre-cached.
216c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    std::string client_hash_;
217c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
218c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // The GURL that arrived with the text_ in a URL request to specify where
219c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // this dictionary may be used.
220c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const GURL url_;
221c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
222c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Metadate "headers" in before dictionary text contained the following:
223c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Each dictionary payload consists of several headers, followed by the text
224c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // of the dictionary.  The following are the known headers.
225c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const std::string domain_;
226c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const std::string path_;
227c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const base::Time expiration_;  // Implied by max-age.
228c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const std::set<int> ports_;
229c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
230c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    DISALLOW_COPY_AND_ASSIGN(Dictionary);
231c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  };
232c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
233c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  SdchManager();
234c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  ~SdchManager();
235c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
236c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Discontinue fetching of dictionaries, as we're now shutting down.
237c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static void Shutdown();
238c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
239c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Provide access to the single instance of this class.
240c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static SdchManager* Global();
241c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
242c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Record stats on various errors.
243c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static void SdchErrorRecovery(ProblemCodes problem);
244c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
245c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Register a fetcher that this class can use to obtain dictionaries.
246c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  void set_sdch_fetcher(SdchFetcher* fetcher) { fetcher_.reset(fetcher); }
247c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
248c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // If called with an empty string, advertise and support sdch on all domains.
249c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // If called with a specific string, advertise and support only the specified
250c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // domain.  Function assumes the existence of a global SdchManager instance.
251c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  void EnableSdchSupport(const std::string& domain);
252c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
253c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static bool sdch_enabled() { return global_ && global_->sdch_enabled_; }
254c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
255c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Briefly prevent further advertising of SDCH on this domain (if SDCH is
256c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // enabled). After enough calls to IsInSupportedDomain() the blacklisting
257c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // will be removed.  Additional blacklists take exponentially more calls
258c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // to IsInSupportedDomain() before the blacklisting is undone.
259c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Used when filter errors are found from a given domain, but it is plausible
260c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // that the cause is temporary (such as application startup, where cached
261c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // entries are used, but a dictionary is not yet loaded).
262c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static void BlacklistDomain(const GURL& url);
263c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
264c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Used when SEVERE filter errors are found from a given domain, to prevent
265c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // further use of SDCH on that domain.
266c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static void BlacklistDomainForever(const GURL& url);
267c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
268c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Unit test only, this function resets enabling of sdch, and clears the
269c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // blacklist.
270c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static void ClearBlacklistings();
271c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
272c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Unit test only, this function resets the blacklisting count for a domain.
273c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static void ClearDomainBlacklisting(const std::string& domain);
274c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
275c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Unit test only: indicate how many more times a domain will be blacklisted.
276c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static int BlackListDomainCount(const std::string& domain);
277c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
278c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Unit test only: Indicate what current blacklist increment is for a domain.
279c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static int BlacklistDomainExponential(const std::string& domain);
280c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
281c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Check to see if SDCH is enabled (globally), and the given URL is in a
282c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // supported domain (i.e., not blacklisted, and either the specific supported
283c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // domain, or all domains were assumed supported).  If it is blacklist, reduce
284c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // by 1 the number of times it will be reported as blacklisted.
285c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  bool IsInSupportedDomain(const GURL& url);
286c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
287c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Schedule the URL fetching to load a dictionary. This will always return
288c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // before the dictionary is actually loaded and added.
289c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // After the implied task does completes, the dictionary will have been
290c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // cached in memory.
291c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  void FetchDictionary(const GURL& request_url, const GURL& dictionary_url);
292c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
293c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Security test function used before initiating a FetchDictionary.
294c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Return true if fetch is legal.
295c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool CanFetchDictionary(const GURL& referring_url,
296c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                          const GURL& dictionary_url) const;
297c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
298c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Add an SDCH dictionary to our list of availible dictionaries. This addition
299c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // will fail (return false) if addition is illegal (data in the dictionary is
300c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // not acceptable from the dictionary_url; dictionary already added, etc.).
301c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool AddSdchDictionary(const std::string& dictionary_text,
302c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                         const GURL& dictionary_url);
303c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
304c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Find the vcdiff dictionary (the body of the sdch dictionary that appears
305c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // after the meta-data headers like Domain:...) with the given |server_hash|
306c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // to use to decompreses data that arrived as SDCH encoded content.  Check to
307c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // be sure the returned |dictionary| can be used for decoding content supplied
308c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // in response to a request for |referring_url|.
309c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Caller is responsible for AddRef()ing the dictionary, and Release()ing it
310c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // when done.
311c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Return null in |dictionary| if there is no matching legal dictionary.
312c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  void GetVcdiffDictionary(const std::string& server_hash,
313c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                           const GURL& referring_url,
314c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                           Dictionary** dictionary);
315c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
316c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Get list of available (pre-cached) dictionaries that we have already loaded
317c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // into memory.  The list is a comma separated list of (client) hashes per
318c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // the SDCH spec.
319c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  void GetAvailDictionaryList(const GURL& target_url, std::string* list);
320c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
321c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Construct the pair of hashes for client and server to identify an SDCH
322c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // dictionary.  This is only made public to facilitate unit testing, but is
323c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // otherwise private
324c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static void GenerateHash(const std::string& dictionary_text,
325c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                           std::string* client_hash, std::string* server_hash);
326c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
327c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // For Latency testing only, we need to know if we've succeeded in doing a
328c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // round trip before starting our comparative tests.  If ever we encounter
329c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // problems with SDCH, we opt-out of the test unless/until we perform a
330c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // complete SDCH decoding.
331c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool AllowLatencyExperiment(const GURL& url) const;
332c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
333c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  void SetAllowLatencyExperiment(const GURL& url, bool enable);
334c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
335c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott private:
336c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  typedef std::map<std::string, int> DomainCounter;
337c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  typedef std::set<std::string> ExperimentSet;
338c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
339c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // A map of dictionaries info indexed by the hash that the server provides.
340c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  typedef std::map<std::string, Dictionary*> DictionaryMap;
341c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
342c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // The one global instance of that holds all the data.
343c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static SdchManager* global_;
344c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
345c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // A simple implementation of a RFC 3548 "URL safe" base64 encoder.
346c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static void UrlSafeBase64Encode(const std::string& input,
347c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                  std::string* output);
348c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DictionaryMap dictionaries_;
349c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
350c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // An instance that can fetch a dictionary given a URL.
351c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  scoped_ptr<SdchFetcher> fetcher_;
352c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
353c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Support SDCH compression, by advertising in headers.
354c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool sdch_enabled_;
355c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
356c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Empty string means all domains.  Non-empty means support only the given
357c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // domain is supported.
358c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::string supported_domain_;
359c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
360c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // List domains where decode failures have required disabling sdch, along with
361c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // count of how many additonal uses should be blacklisted.
362c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DomainCounter blacklisted_domains_;
363c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
364c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Support exponential backoff in number of domain accesses before
365c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // blacklisting expires.
366c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DomainCounter exponential_blacklist_count;
367c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
368c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // List of hostnames for which a latency experiment is allowed (because a
369c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // round trip test has recently passed).
370c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  ExperimentSet allow_latency_experiment_;
371c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
372c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DISALLOW_COPY_AND_ASSIGN(SdchManager);
373c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott};
374c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
37572a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen}  // namespace net
37672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen
377c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#endif  // NET_BASE_SDCH_MANAGER_H_
378