1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// This class is used by the RenderView to interact with a PhishingClassifier.
6
7#ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_CLASSIFIER_DELEGATE_H_
8#define CHROME_RENDERER_SAFE_BROWSING_PHISHING_CLASSIFIER_DELEGATE_H_
9
10#include "base/memory/scoped_ptr.h"
11#include "base/strings/string16.h"
12#include "content/public/renderer/render_process_observer.h"
13#include "content/public/renderer/render_view_observer.h"
14#include "ui/base/page_transition_types.h"
15#include "url/gurl.h"
16
17namespace safe_browsing {
18class ClientPhishingRequest;
19class PhishingClassifier;
20class Scorer;
21
22class PhishingClassifierFilter : public content::RenderProcessObserver {
23 public:
24  static PhishingClassifierFilter* Create();
25  virtual ~PhishingClassifierFilter();
26
27  virtual bool OnControlMessageReceived(const IPC::Message& message) OVERRIDE;
28
29 private:
30  PhishingClassifierFilter();
31  void OnSetPhishingModel(const std::string& model);
32
33  DISALLOW_COPY_AND_ASSIGN(PhishingClassifierFilter);
34};
35
36class PhishingClassifierDelegate : public content::RenderViewObserver {
37 public:
38  // The RenderView owns us.  This object takes ownership of the classifier.
39  // Note that if classifier is null, a default instance of PhishingClassifier
40  // will be used.
41  static PhishingClassifierDelegate* Create(content::RenderView* render_view,
42                                            PhishingClassifier* classifier);
43  virtual ~PhishingClassifierDelegate();
44
45  // Called by the RenderView once there is a phishing scorer available.
46  // The scorer is passed on to the classifier.
47  void SetPhishingScorer(const safe_browsing::Scorer* scorer);
48
49  // Called by the RenderView once a page has finished loading.  Updates the
50  // last-loaded URL and page text, then starts classification if all other
51  // conditions are met (see MaybeStartClassification for details).
52  // We ignore preliminary captures, since these happen before the page has
53  // finished loading.
54  void PageCaptured(base::string16* page_text, bool preliminary_capture);
55
56  // RenderViewObserver implementation, public for testing.
57
58  // Called by the RenderView when a page has started loading in the given
59  // WebFrame.  Typically, this will cause any pending classification to be
60  // cancelled.  However, if the navigation is within the same page, we
61  // continue running the current classification.
62  virtual void DidCommitProvisionalLoad(blink::WebLocalFrame* frame,
63                                        bool is_new_navigation) OVERRIDE;
64
65 private:
66  friend class PhishingClassifierDelegateTest;
67
68  PhishingClassifierDelegate(content::RenderView* render_view,
69                             PhishingClassifier* classifier);
70
71  enum CancelClassificationReason {
72    NAVIGATE_AWAY,
73    NAVIGATE_WITHIN_PAGE,
74    PAGE_RECAPTURED,
75    SHUTDOWN,
76    NEW_PHISHING_SCORER,
77    CANCEL_CLASSIFICATION_MAX  // Always add new values before this one.
78  };
79
80  // Cancels any pending classification and frees the page text.
81  void CancelPendingClassification(CancelClassificationReason reason);
82
83  // RenderViewObserver implementation.
84  virtual bool OnMessageReceived(const IPC::Message& message) OVERRIDE;
85
86  // Called by the RenderView when it receives a StartPhishingDetection IPC
87  // from the browser.  This signals that it is ok to begin classification
88  // for the given toplevel URL.  If the URL has been fully loaded into the
89  // RenderView and a Scorer has been set, this will begin classification,
90  // otherwise classification will be deferred until these conditions are met.
91  void OnStartPhishingDetection(const GURL& url);
92
93  // Called when classification for the current page finishes.
94  void ClassificationDone(const ClientPhishingRequest& verdict);
95
96  // Returns the RenderView's toplevel URL.
97  GURL GetToplevelUrl();
98
99  // Shared code to begin classification if all conditions are met.
100  void MaybeStartClassification();
101
102  // The PhishingClassifier to use for the RenderView.  This is created once
103  // a scorer is made available via SetPhishingScorer().
104  scoped_ptr<PhishingClassifier> classifier_;
105
106  // The last URL that the browser instructed us to classify,
107  // with the ref stripped.
108  GURL last_url_received_from_browser_;
109
110  // The last top-level URL that has finished loading in the RenderView.
111  // This corresponds to the text in classifier_page_text_.
112  GURL last_finished_load_url_;
113
114  // The transition type for the last load in the main frame.  We use this
115  // to exclude back/forward loads from classification.  Note that this is
116  // set in DidCommitProvisionalLoad(); the transition is reset after this
117  // call in the RenderView, so we need to save off the value.
118  ui::PageTransition last_main_frame_transition_;
119
120  // The URL of the last load that we actually started classification on.
121  // This is used to suppress phishing classification on subframe navigation
122  // and back and forward navigations in history.
123  GURL last_url_sent_to_classifier_;
124
125  // The page text that will be analyzed by the phishing classifier.  This is
126  // set by OnNavigate and cleared when the classifier finishes.  Note that if
127  // there is no Scorer yet when OnNavigate is called, or the browser has not
128  // instructed us to classify the page, the page text will be cached until
129  // these conditions are met.
130  base::string16 classifier_page_text_;
131
132  // Tracks whether we have stored anything in classifier_page_text_ for the
133  // most recent load.  We use this to distinguish empty text from cases where
134  // PageCaptured has not been called.
135  bool have_page_text_;
136
137  // Set to true if the classifier is currently running.
138  bool is_classifying_;
139
140  DISALLOW_COPY_AND_ASSIGN(PhishingClassifierDelegate);
141};
142
143}  // namespace safe_browsing
144
145#endif  // CHROME_RENDERER_SAFE_BROWSING_PHISHING_CLASSIFIER_DELEGATE_H_
146