1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/policy/url_blacklist_manager.h"
6
7#include "base/bind.h"
8#include "base/files/file_path.h"
9#include "base/message_loop/message_loop.h"
10#include "base/prefs/pref_service.h"
11#include "base/stl_util.h"
12#include "base/strings/string_number_conversions.h"
13#include "base/values.h"
14#include "chrome/browser/chrome_notification_types.h"
15#include "chrome/common/net/url_fixer_upper.h"
16#include "chrome/common/pref_names.h"
17#include "components/user_prefs/pref_registry_syncable.h"
18#include "content/public/browser/browser_thread.h"
19#include "content/public/browser/notification_details.h"
20#include "content/public/browser/notification_source.h"
21#include "content/public/common/url_constants.h"
22#include "google_apis/gaia/gaia_urls.h"
23#include "net/base/load_flags.h"
24#include "net/base/net_util.h"
25#include "net/url_request/url_request.h"
26#include "url/gurl.h"
27
28#if !defined(OS_CHROMEOS)
29#include "chrome/browser/signin/signin_manager.h"
30#endif
31
32using content::BrowserThread;
33using url_matcher::URLMatcher;
34using url_matcher::URLMatcherCondition;
35using url_matcher::URLMatcherConditionFactory;
36using url_matcher::URLMatcherConditionSet;
37using url_matcher::URLMatcherPortFilter;
38using url_matcher::URLMatcherSchemeFilter;
39
40namespace policy {
41
42namespace {
43
44// Maximum filters per policy. Filters over this index are ignored.
45const size_t kMaxFiltersPerPolicy = 1000;
46
47#if !defined(OS_CHROMEOS)
48
49const char kServiceLoginAuth[] = "/ServiceLoginAuth";
50
51bool IsSigninFlowURL(const GURL& url) {
52  // Whitelist all the signin flow URLs flagged by the SigninManager.
53  if (SigninManager::IsWebBasedSigninFlowURL(url))
54    return true;
55
56  // Additionally whitelist /ServiceLoginAuth.
57  if (url.GetOrigin() != GaiaUrls::GetInstance()->gaia_url().GetOrigin())
58    return false;
59  return url.path() == kServiceLoginAuth;
60}
61
62#endif  // !defined(OS_CHROMEOS)
63
64// A task that builds the blacklist on the FILE thread.
65scoped_ptr<URLBlacklist> BuildBlacklist(scoped_ptr<base::ListValue> block,
66                                        scoped_ptr<base::ListValue> allow) {
67  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
68
69  scoped_ptr<URLBlacklist> blacklist(new URLBlacklist);
70  blacklist->Block(block.get());
71  blacklist->Allow(allow.get());
72  return blacklist.Pass();
73}
74
75}  // namespace
76
77struct URLBlacklist::FilterComponents {
78  FilterComponents() : port(0), match_subdomains(true), allow(true) {}
79  ~FilterComponents() {}
80
81  std::string scheme;
82  std::string host;
83  uint16 port;
84  std::string path;
85  bool match_subdomains;
86  bool allow;
87};
88
89URLBlacklist::URLBlacklist() : id_(0),
90                               url_matcher_(new URLMatcher) {
91}
92
93URLBlacklist::~URLBlacklist() {
94}
95
96void URLBlacklist::AddFilters(bool allow,
97                              const base::ListValue* list) {
98  URLMatcherConditionSet::Vector all_conditions;
99  size_t size = std::min(kMaxFiltersPerPolicy, list->GetSize());
100  for (size_t i = 0; i < size; ++i) {
101    std::string pattern;
102    bool success = list->GetString(i, &pattern);
103    DCHECK(success);
104    FilterComponents components;
105    components.allow = allow;
106    if (!FilterToComponents(pattern, &components.scheme, &components.host,
107                            &components.match_subdomains, &components.port,
108                            &components.path)) {
109      LOG(ERROR) << "Invalid pattern " << pattern;
110      continue;
111    }
112
113    all_conditions.push_back(
114        CreateConditionSet(url_matcher_.get(), ++id_, components.scheme,
115                           components.host, components.match_subdomains,
116                           components.port, components.path));
117    filters_[id_] = components;
118  }
119  url_matcher_->AddConditionSets(all_conditions);
120}
121
122void URLBlacklist::Block(const base::ListValue* filters) {
123  AddFilters(false, filters);
124}
125
126void URLBlacklist::Allow(const base::ListValue* filters) {
127  AddFilters(true, filters);
128}
129
130bool URLBlacklist::IsURLBlocked(const GURL& url) const {
131  std::set<URLMatcherConditionSet::ID> matching_ids =
132      url_matcher_->MatchURL(url);
133
134  const FilterComponents* max = NULL;
135  for (std::set<URLMatcherConditionSet::ID>::iterator id = matching_ids.begin();
136       id != matching_ids.end(); ++id) {
137    std::map<int, FilterComponents>::const_iterator it = filters_.find(*id);
138    DCHECK(it != filters_.end());
139    const FilterComponents& filter = it->second;
140    if (!max || FilterTakesPrecedence(filter, *max))
141      max = &filter;
142  }
143
144  // Default to allow.
145  if (!max)
146    return false;
147
148  return !max->allow;
149}
150
151size_t URLBlacklist::Size() const {
152  return filters_.size();
153}
154
155// static
156bool URLBlacklist::FilterToComponents(const std::string& filter,
157                                      std::string* scheme,
158                                      std::string* host,
159                                      bool* match_subdomains,
160                                      uint16* port,
161                                      std::string* path) {
162  url_parse::Parsed parsed;
163
164  if (URLFixerUpper::SegmentURL(filter, &parsed) == chrome::kFileScheme) {
165    base::FilePath file_path;
166    if (!net::FileURLToFilePath(GURL(filter), &file_path))
167      return false;
168
169    *scheme = chrome::kFileScheme;
170    host->clear();
171    *match_subdomains = true;
172    *port = 0;
173    // Special path when the |filter| is 'file://*'.
174    *path = (filter == "file://*") ? "" : file_path.AsUTF8Unsafe();
175#if defined(FILE_PATH_USES_WIN_SEPARATORS)
176    // Separators have to be canonicalized on Windows.
177    std::replace(path->begin(), path->end(), '\\', '/');
178    *path = "/" + *path;
179#endif
180    return true;
181  }
182
183  if (!parsed.host.is_nonempty())
184    return false;
185
186  if (parsed.scheme.is_nonempty())
187    scheme->assign(filter, parsed.scheme.begin, parsed.scheme.len);
188  else
189    scheme->clear();
190
191  host->assign(filter, parsed.host.begin, parsed.host.len);
192  // Special '*' host, matches all hosts.
193  if (*host == "*") {
194    host->clear();
195    *match_subdomains = true;
196  } else if ((*host)[0] == '.') {
197    // A leading dot in the pattern syntax means that we don't want to match
198    // subdomains.
199    host->erase(0, 1);
200    *match_subdomains = false;
201  } else {
202    url_canon::RawCanonOutputT<char> output;
203    url_canon::CanonHostInfo host_info;
204    url_canon::CanonicalizeHostVerbose(filter.c_str(), parsed.host,
205                                       &output, &host_info);
206    if (host_info.family == url_canon::CanonHostInfo::NEUTRAL) {
207      // We want to match subdomains. Add a dot in front to make sure we only
208      // match at domain component boundaries.
209      *host = "." + *host;
210      *match_subdomains = true;
211    } else {
212      *match_subdomains = false;
213    }
214  }
215
216  if (parsed.port.is_nonempty()) {
217    int int_port;
218    if (!base::StringToInt(filter.substr(parsed.port.begin, parsed.port.len),
219                           &int_port)) {
220      return false;
221    }
222    if (int_port <= 0 || int_port > kuint16max)
223      return false;
224    *port = int_port;
225  } else {
226    // Match any port.
227    *port = 0;
228  }
229
230  if (parsed.path.is_nonempty())
231    path->assign(filter, parsed.path.begin, parsed.path.len);
232  else
233    path->clear();
234
235  return true;
236}
237
238// static
239scoped_refptr<URLMatcherConditionSet> URLBlacklist::CreateConditionSet(
240    URLMatcher* url_matcher,
241    int id,
242    const std::string& scheme,
243    const std::string& host,
244    bool match_subdomains,
245    uint16 port,
246    const std::string& path) {
247  URLMatcherConditionFactory* condition_factory =
248      url_matcher->condition_factory();
249  std::set<URLMatcherCondition> conditions;
250  conditions.insert(match_subdomains ?
251      condition_factory->CreateHostSuffixPathPrefixCondition(host, path) :
252      condition_factory->CreateHostEqualsPathPrefixCondition(host, path));
253
254  scoped_ptr<URLMatcherSchemeFilter> scheme_filter;
255  if (!scheme.empty())
256    scheme_filter.reset(new URLMatcherSchemeFilter(scheme));
257
258  scoped_ptr<URLMatcherPortFilter> port_filter;
259  if (port != 0) {
260    std::vector<URLMatcherPortFilter::Range> ranges;
261    ranges.push_back(URLMatcherPortFilter::CreateRange(port));
262    port_filter.reset(new URLMatcherPortFilter(ranges));
263  }
264
265  return new URLMatcherConditionSet(id, conditions,
266                                    scheme_filter.Pass(), port_filter.Pass());
267}
268
269// static
270bool URLBlacklist::FilterTakesPrecedence(const FilterComponents& lhs,
271                                         const FilterComponents& rhs) {
272  if (lhs.match_subdomains && !rhs.match_subdomains)
273    return false;
274  if (!lhs.match_subdomains && rhs.match_subdomains)
275    return true;
276
277  size_t host_length = lhs.host.length();
278  size_t other_host_length = rhs.host.length();
279  if (host_length != other_host_length)
280    return host_length > other_host_length;
281
282  size_t path_length = lhs.path.length();
283  size_t other_path_length = rhs.path.length();
284  if (path_length != other_path_length)
285    return path_length > other_path_length;
286
287  if (lhs.allow && !rhs.allow)
288    return true;
289
290  return false;
291}
292
293URLBlacklistManager::URLBlacklistManager(PrefService* pref_service)
294    : ui_weak_ptr_factory_(this),
295      pref_service_(pref_service),
296      io_weak_ptr_factory_(this),
297      blacklist_(new URLBlacklist) {
298  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
299
300  pref_change_registrar_.Init(pref_service_);
301  base::Closure callback = base::Bind(&URLBlacklistManager::ScheduleUpdate,
302                                      base::Unretained(this));
303  pref_change_registrar_.Add(prefs::kUrlBlacklist, callback);
304  pref_change_registrar_.Add(prefs::kUrlWhitelist, callback);
305
306  // Start enforcing the policies without a delay when they are present at
307  // startup.
308  if (pref_service_->HasPrefPath(prefs::kUrlBlacklist))
309    Update();
310}
311
312void URLBlacklistManager::ShutdownOnUIThread() {
313  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
314  // Cancel any pending updates, and stop listening for pref change updates.
315  ui_weak_ptr_factory_.InvalidateWeakPtrs();
316  pref_change_registrar_.RemoveAll();
317}
318
319URLBlacklistManager::~URLBlacklistManager() {
320}
321
322void URLBlacklistManager::ScheduleUpdate() {
323  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
324  // Cancel pending updates, if any. This can happen if two preferences that
325  // change the blacklist are updated in one message loop cycle. In those cases,
326  // only rebuild the blacklist after all the preference updates are processed.
327  ui_weak_ptr_factory_.InvalidateWeakPtrs();
328  base::MessageLoop::current()->PostTask(
329      FROM_HERE,
330      base::Bind(&URLBlacklistManager::Update,
331                 ui_weak_ptr_factory_.GetWeakPtr()));
332}
333
334void URLBlacklistManager::Update() {
335  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
336
337  // The preferences can only be read on the UI thread.
338  scoped_ptr<base::ListValue> block(
339      pref_service_->GetList(prefs::kUrlBlacklist)->DeepCopy());
340  scoped_ptr<base::ListValue> allow(
341      pref_service_->GetList(prefs::kUrlWhitelist)->DeepCopy());
342
343  // Go through the IO thread to grab a WeakPtr to |this|. This is safe from
344  // here, since this task will always execute before a potential deletion of
345  // ProfileIOData on IO.
346  BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
347                          base::Bind(&URLBlacklistManager::UpdateOnIO,
348                                     base::Unretained(this),
349                                     base::Passed(&block),
350                                     base::Passed(&allow)));
351}
352
353void URLBlacklistManager::UpdateOnIO(scoped_ptr<base::ListValue> block,
354                                     scoped_ptr<base::ListValue> allow) {
355  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
356  // The URLBlacklist is built on the FILE thread. Once it's ready, it is passed
357  // to the URLBlacklistManager on IO.
358  BrowserThread::PostTaskAndReplyWithResult(
359      BrowserThread::FILE, FROM_HERE,
360      base::Bind(&BuildBlacklist,
361                 base::Passed(&block),
362                 base::Passed(&allow)),
363      base::Bind(&URLBlacklistManager::SetBlacklist,
364                 io_weak_ptr_factory_.GetWeakPtr()));
365}
366
367void URLBlacklistManager::SetBlacklist(scoped_ptr<URLBlacklist> blacklist) {
368  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
369  blacklist_ = blacklist.Pass();
370}
371
372bool URLBlacklistManager::IsURLBlocked(const GURL& url) const {
373  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
374  return blacklist_->IsURLBlocked(url);
375}
376
377bool URLBlacklistManager::IsRequestBlocked(
378    const net::URLRequest& request) const {
379  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
380  int filter_flags = net::LOAD_MAIN_FRAME | net::LOAD_SUB_FRAME;
381  if ((request.load_flags() & filter_flags) == 0)
382    return false;
383
384#if !defined(OS_CHROMEOS)
385  if (IsSigninFlowURL(request.url()))
386    return false;
387#endif
388
389  return IsURLBlocked(request.url());
390}
391
392// static
393void URLBlacklistManager::RegisterProfilePrefs(
394    user_prefs::PrefRegistrySyncable* registry) {
395  registry->RegisterListPref(prefs::kUrlBlacklist,
396                             user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF);
397  registry->RegisterListPref(prefs::kUrlWhitelist,
398                             user_prefs::PrefRegistrySyncable::UNSYNCABLE_PREF);
399}
400
401}  // namespace policy
402