15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Note that although this is not a "browser" test, it runs as part of
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// browser_tests.  This is because WebKit does not work properly if it is
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// shutdown and re-initialized.  Since browser_tests runs each test in a
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// new process, this avoids the problem.
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h"
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/bind.h"
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/callback.h"
14f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "base/command_line.h"
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/compiler_specific.h"
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/memory/weak_ptr.h"
179ab5563a3196760eb381d102cbb2bc0f7abc6a50Ben Murdoch#include "base/message_loop/message_loop.h"
18f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "base/strings/string_number_conversions.h"
19eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch#include "base/time/time.h"
20f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "chrome/browser/ui/browser.h"
21f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "chrome/browser/ui/tabs/tab_strip_model.h"
22f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "chrome/common/chrome_switches.h"
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/renderer/safe_browsing/features.h"
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/renderer/safe_browsing/mock_feature_extractor_clock.h"
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/renderer/safe_browsing/test_utils.h"
26f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "chrome/test/base/in_process_browser_test.h"
27f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "chrome/test/base/ui_test_utils.h"
28f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "content/public/browser/interstitial_page.h"
29f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "content/public/browser/web_contents.h"
30f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "content/public/renderer/render_view.h"
31f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "content/public/test/browser_test_utils.h"
32f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "content/public/test/test_utils.h"
33f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "net/dns/mock_host_resolver.h"
34f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "net/test/embedded_test_server/embedded_test_server.h"
35f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "net/test/embedded_test_server/http_request.h"
36f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "net/test/embedded_test_server/http_response.h"
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "testing/gmock/include/gmock/gmock.h"
38868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "third_party/WebKit/public/platform/WebString.h"
397d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)#include "third_party/WebKit/public/web/WebFrame.h"
407d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)#include "third_party/WebKit/public/web/WebScriptSource.h"
41f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "third_party/WebKit/public/web/WebView.h"
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using ::testing::DoAll;
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using ::testing::Invoke;
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using ::testing::Return;
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)namespace {
485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)// The first RenderFrame is routing ID 1, and the first RenderView is 2.
505d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)const int kRenderViewRoutingId = 2;
515d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
525d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)}
535d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace safe_browsing {
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
56f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)class PhishingDOMFeatureExtractorTest : public InProcessBrowserTest {
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
58f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  content::WebContents* GetWebContents() {
59f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return browser()->tab_strip_model()->GetActiveWebContents();
60f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  }
61f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Helper for the SubframeRemoval test that posts a message to remove
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the iframe "frame1" from the document.
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void ScheduleRemoveIframe() {
65f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    base::MessageLoop::current()->PostTask(
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        FROM_HERE,
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        base::Bind(&PhishingDOMFeatureExtractorTest::RemoveIframe,
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   weak_factory_.GetWeakPtr()));
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) protected:
72f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  PhishingDOMFeatureExtractorTest() : weak_factory_(this) {}
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual ~PhishingDOMFeatureExtractorTest() {}
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
76f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  virtual void SetUpCommandLine(CommandLine* command_line) OVERRIDE {
77f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    command_line->AppendSwitch(switches::kSingleProcess);
785d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#if defined(OS_WIN)
79f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    // Don't want to try to create a GPU process.
800529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch    command_line->AppendSwitch(switches::kDisableGpu);
81f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#endif
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
84f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  virtual void SetUpOnMainThread() OVERRIDE {
85f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    extractor_.reset(new PhishingDOMFeatureExtractor(
865d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)        content::RenderView::FromRoutingID(kRenderViewRoutingId), &clock_));
87f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
88f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    ASSERT_TRUE(StartTestServer());
89f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    host_resolver()->AddRule("*", "127.0.0.1");
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Runs the DOMFeatureExtractor on the RenderView, waiting for the
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // completion callback.  Returns the success boolean from the callback.
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool ExtractFeatures(FeatureMap* features) {
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    success_ = false;
96f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    PostTaskToInProcessRendererAndWait(
97f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        base::Bind(&PhishingDOMFeatureExtractorTest::ExtractFeaturesInternal,
98f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        base::Unretained(this),
99f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        features));
100f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return success_;
101f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  }
102f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
103f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  void ExtractFeaturesInternal(FeatureMap* features) {
104f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    scoped_refptr<content::MessageLoopRunner> message_loop =
105f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        new content::MessageLoopRunner;
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    extractor_->ExtractFeatures(
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        features,
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        base::Bind(&PhishingDOMFeatureExtractorTest::ExtractionDone,
109f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                   base::Unretained(this),
110f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                   message_loop->QuitClosure()));
111f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    message_loop->Run();
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Completion callback for feature extraction.
115f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  void ExtractionDone(const base::Closure& quit_closure,
116f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                      bool success) {
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    success_ = success;
118f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    quit_closure.Run();
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Does the actual work of removing the iframe "frame1" from the document.
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void RemoveIframe() {
1235d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    content::RenderView* render_view =
1245d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)        content::RenderView::FromRoutingID(kRenderViewRoutingId);
1255d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    blink::WebFrame* main_frame = render_view->GetWebView()->mainFrame();
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ASSERT_TRUE(main_frame);
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    main_frame->executeScript(
128f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        blink::WebString(
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            "document.body.removeChild(document.getElementById('frame1'));"));
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
132f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  bool StartTestServer() {
133f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    CHECK(!embedded_test_server_);
134f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    embedded_test_server_.reset(new net::test_server::EmbeddedTestServer());
135f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    embedded_test_server_->RegisterRequestHandler(
136f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        base::Bind(&PhishingDOMFeatureExtractorTest::HandleRequest,
137f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                   base::Unretained(this)));
138f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return embedded_test_server_->InitializeAndWaitUntilReady();
139f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  }
140f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
141f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  scoped_ptr<net::test_server::HttpResponse> HandleRequest(
142f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      const net::test_server::HttpRequest& request) {
143f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    std::map<std::string, std::string>::const_iterator host_it =
144f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        request.headers.find("Host");
145f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if (host_it == request.headers.end())
146f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      return scoped_ptr<net::test_server::HttpResponse>();
147f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
148f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    std::string url =
149f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        std::string("http://") + host_it->second + request.relative_url;
150f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    std::map<std::string, std::string>::const_iterator it =
151f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        responses_.find(url);
152f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if (it == responses_.end())
153f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      return scoped_ptr<net::test_server::HttpResponse>();
154f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
155f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    scoped_ptr<net::test_server::BasicHttpResponse> http_response(
156f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        new net::test_server::BasicHttpResponse());
157f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    http_response->set_code(net::HTTP_OK);
158f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    http_response->set_content_type("text/html");
159f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    http_response->set_content(it->second);
160f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return http_response.PassAs<net::test_server::HttpResponse>();
161f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  }
162f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
163f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  GURL GetURL(const std::string& host, const std::string& path) {
164f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    GURL::Replacements replace;
165f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    replace.SetHostStr(host);
166f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    replace.SetPathStr(path);
167f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return embedded_test_server_->base_url().ReplaceComponents(replace);
168f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  }
169f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
170f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // Returns the URL that was loaded.
171f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  GURL LoadHtml(const std::string& host, const std::string& content) {
172f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    GURL url(GetURL(host, ""));
173f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    responses_[url.spec()] = content;
174f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    ui_test_utils::NavigateToURL(browser(), url);
175f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return url;
176f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  }
177f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
178f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // Map of url -> response body for network requests from the renderer.
179f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // Any urls not in this map are served a 404 error.
180f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  std::map<std::string, std::string> responses_;
181f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
182f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  scoped_ptr<net::test_server::EmbeddedTestServer> embedded_test_server_;
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  MockFeatureExtractorClock clock_;
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  scoped_ptr<PhishingDOMFeatureExtractor> extractor_;
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool success_;  // holds the success value from ExtractFeatures
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::WeakPtrFactory<PhishingDOMFeatureExtractorTest> weak_factory_;
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
189f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest, FormFeatures) {
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This test doesn't exercise the extraction timing.
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FeatureMap expected_features;
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddBooleanFeature(features::kPageHasForms);
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.25);
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddBooleanFeature(features::kPageHasTextInputs);
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddBooleanFeature(features::kPageHasCheckInputs);
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FeatureMap features;
200f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  LoadHtml(
201f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "host.com",
202f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "<html><head><body>"
203f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "<form action=\"query\"><input type=text><input type=checkbox></form>"
204f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "<form action=\"http://cgi.host.com/submit\"></form>"
205f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "<form action=\"http://other.com/\"></form>"
206f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "<form action=\"query\"></form>"
207f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "<form></form></body></html>");
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ASSERT_TRUE(ExtractFeatures(&features));
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ExpectFeatureMapsAreEqual(features, expected_features);
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.Clear();
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddBooleanFeature(features::kPageHasRadioInputs);
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  features.Clear();
216f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  LoadHtml(
217f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "host.com",
218f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "<html><head><body>"
219f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "<input type=\"radio\"><input type=password></body></html>");
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ASSERT_TRUE(ExtractFeatures(&features));
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ExpectFeatureMapsAreEqual(features, expected_features);
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.Clear();
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddBooleanFeature(features::kPageHasTextInputs);
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  features.Clear();
227f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  LoadHtml(
228f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "host.com",
229f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "<html><head><body><input></body></html>");
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ASSERT_TRUE(ExtractFeatures(&features));
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ExpectFeatureMapsAreEqual(features, expected_features);
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.Clear();
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddBooleanFeature(features::kPageHasTextInputs);
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  features.Clear();
237f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  LoadHtml(
238f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "host.com",
239f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "<html><head><body><input type=\"invalid\"></body></html>");
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ASSERT_TRUE(ExtractFeatures(&features));
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ExpectFeatureMapsAreEqual(features, expected_features);
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
244f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest, LinkFeatures) {
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This test doesn't exercise the extraction timing.
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FeatureMap expected_features;
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.5);
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.0);
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddBooleanFeature(features::kPageLinkDomain +
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                      std::string("chromium.org"));
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FeatureMap features;
255f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  LoadHtml(
256f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "www.host.com",
257f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "<html><head><body>"
258f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "<a href=\"http://www2.host.com/abc\">link</a>"
259f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "<a name=page_anchor></a>"
260f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "<a href=\"http://www.chromium.org/\">chromium</a>"
261f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "</body></html");
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ASSERT_TRUE(ExtractFeatures(&features));
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ExpectFeatureMapsAreEqual(features, expected_features);
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.Clear();
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.25);
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.5);
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddBooleanFeature(features::kPageLinkDomain +
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                      std::string("chromium.org"));
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
271f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  net::SpawnedTestServer https_server(
272f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      net::SpawnedTestServer::TYPE_HTTPS,
273f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      net::SpawnedTestServer::kLocalhost,
274f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      base::FilePath(FILE_PATH_LITERAL("chrome/test/data")));
275f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  ASSERT_TRUE(https_server.Start());
276f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
277f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // The PhishingDOMFeatureExtractor depends on URLs being domains and not IPs,
278f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // so use a domain.
279f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  std::string url_str = "https://host.com:";
280f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  url_str += base::IntToString(https_server.host_port_pair().port());
281f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  url_str += "/files/safe_browsing/secure_link_features.html";
282f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  ui_test_utils::NavigateToURL(browser(), GURL(url_str));
283f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
284f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // Click through the certificate error interstitial.
285f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  content::InterstitialPage* interstitial_page =
286f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      GetWebContents()->GetInterstitialPage();
287f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  interstitial_page->Proceed();
288f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  content::WaitForLoadStop(GetWebContents());
289f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  features.Clear();
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ASSERT_TRUE(ExtractFeatures(&features));
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ExpectFeatureMapsAreEqual(features, expected_features);
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
295f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest,
296f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                       ScriptAndImageFeatures) {
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This test doesn't exercise the extraction timing.
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FeatureMap expected_features;
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FeatureMap features;
304f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  LoadHtml(
305f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "host.com",
306f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "<html><head><script></script><script></script></head></html>");
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ASSERT_TRUE(ExtractFeatures(&features));
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ExpectFeatureMapsAreEqual(features, expected_features);
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.Clear();
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTSix);
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddRealFeature(features::kPageImgOtherDomainFreq, 0.5);
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  features.Clear();
316f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  net::SpawnedTestServer https_server(
317f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      net::SpawnedTestServer::TYPE_HTTPS,
318f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      net::SpawnedTestServer::kLocalhost,
319f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      base::FilePath(FILE_PATH_LITERAL("chrome/test/data")));
320f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  ASSERT_TRUE(https_server.Start());
321f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
322f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // The PhishingDOMFeatureExtractor depends on URLs being domains and not IPs,
323f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // so use a domain.
324f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  std::string url_str = "https://host.com:";
325f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  url_str += base::IntToString(https_server.host_port_pair().port());
326f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  url_str += "/files/safe_browsing/secure_script_and_image.html";
327f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  ui_test_utils::NavigateToURL(browser(), GURL(url_str));
328f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
329f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  // Click through the certificate error interstitial.
330f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  content::InterstitialPage* interstitial_page =
331f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      GetWebContents()->GetInterstitialPage();
332f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  interstitial_page->Proceed();
333f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  content::WaitForLoadStop(GetWebContents());
334f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ASSERT_TRUE(ExtractFeatures(&features));
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ExpectFeatureMapsAreEqual(features, expected_features);
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
339f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest, SubFrames) {
3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This test doesn't exercise the extraction timing.
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_CALL(clock_, Now()).WillRepeatedly(Return(base::TimeTicks::Now()));
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Test that features are aggregated across all frames.
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
345f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  std::string port = base::IntToString(embedded_test_server_->port());
346f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  responses_[GetURL("host2.com", "").spec()] =
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      "<html><head><script></script><body>"
3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      "<form action=\"http://host4.com/\"><input type=checkbox></form>"
3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      "<form action=\"http://host2.com/submit\"></form>"
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      "<a href=\"http://www.host2.com/home\">link</a>"
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      "<iframe src=\"nested.html\"></iframe>"
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      "<body></html>";
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
354f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  responses_[GetURL("host2.com", "nested.html").spec()] =
3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      "<html><body><input type=password>"
3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      "<a href=\"https://host4.com/\">link</a>"
3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      "<a href=\"relative\">another</a>"
3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      "</body></html>";
3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
360f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  responses_[GetURL("host3.com", "").spec()] =
3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      "<html><head><script></script><body>"
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      "<img src=\"http://host.com/123.png\">"
3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      "</body></html>";
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FeatureMap expected_features;
3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddBooleanFeature(features::kPageHasForms);
3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Form action domains are compared to the URL of the document they're in,
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // not the URL of the toplevel page.  So http://host2.com/ has two form
3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // actions, one of which is external.
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.5);
3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddBooleanFeature(features::kPageHasTextInputs);
3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddBooleanFeature(features::kPageHasCheckInputs);
3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.25);
3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddBooleanFeature(features::kPageLinkDomain +
3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                      std::string("host4.com"));
3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.25);
3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddRealFeature(features::kPageImgOtherDomainFreq, 1.0);
3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FeatureMap features;
382f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  std::string html(
383f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "<html><body><input type=text><a href=\"info.html\">link</a>"
384f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "<iframe src=\"http://host2.com:");
385f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  html += port;
386f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  html += std::string(
387f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "/\"></iframe>"
388f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "<iframe src=\"http://host3.com:");
389f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  html += port;
390f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  html += std::string("/\"></iframe></body></html>");
391f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
392f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  LoadHtml("host.com", html);
3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ASSERT_TRUE(ExtractFeatures(&features));
3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ExpectFeatureMapsAreEqual(features, expected_features);
3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
397f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest, Continuation) {
3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // For this test, we'll cause the feature extraction to run multiple
3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // iterations by incrementing the clock.
4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This page has a total of 50 elements.  For the external forms feature to
4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // be computed correctly, the extractor has to examine the whole document.
4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Note: the empty HEAD is important -- WebKit will synthesize a HEAD if
4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // there isn't one present, which can be confusing for the element counts.
4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string response = "<html><head></head><body>"
4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      "<form action=\"ondomain\"></form>";
4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 0; i < 45; ++i) {
4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    response.append("<p>");
4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  response.append("<form action=\"http://host2.com/\"></form></body></html>");
4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Advance the clock 6 ms every 10 elements processed, 10 ms between chunks.
4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Note that this assumes kClockCheckGranularity = 10 and
4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // kMaxTimePerChunkMs = 10.
4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::TimeTicks now = base::TimeTicks::Now();
4165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_CALL(clock_, Now())
4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Time check at the start of extraction.
4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now))
4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Time check at the start of the first chunk of work.
4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now))
4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Time check after the first 10 elements.
4225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(6)))
4235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Time check after the next 10 elements.  This is over the chunk
4245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // time limit, so a continuation task will be posted.
4255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(12)))
4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Time check at the start of the second chunk of work.
4275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(22)))
4285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Time check after resuming iteration for the second chunk.
4295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(24)))
4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Time check after the next 10 elements.
4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(30)))
4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Time check after the next 10 elements.  This will trigger another
4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // continuation task.
4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(36)))
4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Time check at the start of the third chunk of work.
4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(46)))
4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Time check after resuming iteration for the third chunk.
4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(48)))
4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Time check after the last 10 elements.
4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(54)))
4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // A final time check for the histograms.
4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(56)));
4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FeatureMap expected_features;
4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddBooleanFeature(features::kPageHasForms);
4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.5);
4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FeatureMap features;
449f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  LoadHtml("host.com", response);
4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ASSERT_TRUE(ExtractFeatures(&features));
4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ExpectFeatureMapsAreEqual(features, expected_features);
4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Make sure none of the mock expectations carry over to the next test.
4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ::testing::Mock::VerifyAndClearExpectations(&clock_);
4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Now repeat the test with the same page, but advance the clock faster so
4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // that the extraction time exceeds the maximum total time for the feature
4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // extractor.  Extraction should fail.  Note that this assumes
4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // kMaxTotalTimeMs = 500.
4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_CALL(clock_, Now())
4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Time check at the start of extraction.
4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now))
4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Time check at the start of the first chunk of work.
4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now))
4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Time check after the first 10 elements.
4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(300)))
4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Time check at the start of the second chunk of work.
4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(350)))
4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Time check after resuming iteration for the second chunk.
4695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(360)))
4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Time check after the next 10 elements.  This is over the limit.
4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(600)))
4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // A final time check for the histograms.
4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(620)));
4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  features.Clear();
4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_FALSE(ExtractFeatures(&features));
4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
479f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)IN_PROC_BROWSER_TEST_F(PhishingDOMFeatureExtractorTest, SubframeRemoval) {
4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // In this test, we'll advance the feature extractor so that it is positioned
4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // inside an iframe, and have it pause due to exceeding the chunk time limit.
4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Then, prior to continuation, the iframe is removed from the document.
4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // As currently implemented, this should finish extraction from the removed
4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // iframe document.
485f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  responses_[GetURL("host.com", "frame.html").spec()] =
4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      "<html><body><p><p><p><input type=password></body></html>";
4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::TimeTicks now = base::TimeTicks::Now();
4895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EXPECT_CALL(clock_, Now())
4905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Time check at the start of extraction.
4915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now))
4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Time check at the start of the first chunk of work.
4935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now))
4945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Time check after the first 10 elements.  Enough time has passed
4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // to stop extraction.  Schedule the iframe removal to happen as soon as
4965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // the feature extractor returns control to the message loop.
4975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(DoAll(
4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          Invoke(this, &PhishingDOMFeatureExtractorTest::ScheduleRemoveIframe),
4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          Return(now + base::TimeDelta::FromMilliseconds(21))))
5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Time check at the start of the second chunk of work.
5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(25)))
5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Time check after resuming iteration for the second chunk.
5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(27)))
5045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // A final time check for the histograms.
5055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      .WillOnce(Return(now + base::TimeDelta::FromMilliseconds(33)));
5065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FeatureMap expected_features;
5085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddBooleanFeature(features::kPageHasForms);
5095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
5105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FeatureMap features;
512f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  LoadHtml(
513f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "host.com",
514f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "<html><head></head><body>"
515f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "<iframe src=\"frame.html\" id=\"frame1\"></iframe>"
516f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      "<form></form></body></html>");
5175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ASSERT_TRUE(ExtractFeatures(&features));
5185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ExpectFeatureMapsAreEqual(features, expected_features);
5195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace safe_browsing
522