1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/extensions/activity_log/uma_policy.h"
6
7#include "base/metrics/histogram.h"
8#include "base/strings/stringprintf.h"
9#include "chrome/browser/browser_process.h"
10#include "chrome/browser/extensions/active_script_controller.h"
11#include "chrome/browser/extensions/activity_log/activity_action_constants.h"
12#include "chrome/browser/extensions/activity_log/ad_network_database.h"
13#include "chrome/browser/sessions/session_id.h"
14#include "chrome/browser/ui/browser.h"
15#include "chrome/browser/ui/browser_list.h"
16#include "chrome/browser/ui/tabs/tab_strip_model.h"
17#include "chrome/common/url_constants.h"
18#include "content/public/browser/web_contents.h"
19#include "content/public/common/url_constants.h"
20#include "extensions/browser/extension_registry.h"
21#include "extensions/common/dom_action_types.h"
22#include "extensions/common/extension.h"
23#include "extensions/common/manifest.h"
24
25namespace extensions {
26
27namespace {
28
29// For convenience.
30const int kNoStatus           = UmaPolicy::NONE;
31const int kContentScript      = 1 << UmaPolicy::CONTENT_SCRIPT;
32const int kReadDom            = 1 << UmaPolicy::READ_DOM;
33const int kModifiedDom        = 1 << UmaPolicy::MODIFIED_DOM;
34const int kDomMethod          = 1 << UmaPolicy::DOM_METHOD;
35const int kDocumentWrite      = 1 << UmaPolicy::DOCUMENT_WRITE;
36const int kInnerHtml          = 1 << UmaPolicy::INNER_HTML;
37const int kCreatedScript      = 1 << UmaPolicy::CREATED_SCRIPT;
38const int kCreatedIframe      = 1 << UmaPolicy::CREATED_IFRAME;
39const int kCreatedDiv         = 1 << UmaPolicy::CREATED_DIV;
40const int kCreatedLink        = 1 << UmaPolicy::CREATED_LINK;
41const int kCreatedInput       = 1 << UmaPolicy::CREATED_INPUT;
42const int kCreatedEmbed       = 1 << UmaPolicy::CREATED_EMBED;
43const int kCreatedObject      = 1 << UmaPolicy::CREATED_OBJECT;
44const int kAdInjected         = 1 << UmaPolicy::AD_INJECTED;
45const int kAdRemoved          = 1 << UmaPolicy::AD_REMOVED;
46const int kAdReplaced         = 1 << UmaPolicy::AD_REPLACED;
47const int kAdLikelyInjected   = 1 << UmaPolicy::AD_LIKELY_INJECTED;
48const int kAdLikelyReplaced   = 1 << UmaPolicy::AD_LIKELY_REPLACED;
49
50// A mask of all the ad injection flags.
51const int kAnyAdActivity = kAdInjected |
52                           kAdRemoved |
53                           kAdReplaced |
54                           kAdLikelyInjected |
55                           kAdLikelyReplaced;
56
57}  // namespace
58
59// Class constants, also used in testing. --------------------------------------
60
61const char UmaPolicy::kNumberOfTabs[]       = "num_tabs";
62const size_t UmaPolicy::kMaxTabsTracked     = 50;
63
64// Setup and shutdown. ---------------------------------------------------------
65
66UmaPolicy::UmaPolicy(Profile* profile)
67    : ActivityLogPolicy(profile), profile_(profile) {
68  DCHECK(!profile->IsOffTheRecord());
69  BrowserList::AddObserver(this);
70}
71
72UmaPolicy::~UmaPolicy() {
73  BrowserList::RemoveObserver(this);
74}
75
76// Unlike the other policies, UmaPolicy can commit suicide directly because it
77// doesn't have a dependency on a database.
78void UmaPolicy::Close() {
79  delete this;
80}
81
82// Process actions. ------------------------------------------------------------
83
84void UmaPolicy::ProcessAction(scoped_refptr<Action> action) {
85  if (!action->page_url().is_valid() && !action->arg_url().is_valid())
86    return;
87  if (action->page_incognito() || action->arg_incognito())
88    return;
89  std::string url;
90  int status = MatchActionToStatus(action);
91  if (action->page_url().is_valid()) {
92    url = CleanURL(action->page_url());
93  } else if (status & kContentScript) {
94    // This is for the tabs.executeScript case.
95    url = CleanURL(action->arg_url());
96  }
97  if (url.empty())
98    return;
99
100  SiteMap::iterator site_lookup = url_status_.find(url);
101  if (site_lookup != url_status_.end())
102    site_lookup->second[action->extension_id()] |= status;
103}
104
105int UmaPolicy::MatchActionToStatus(scoped_refptr<Action> action) {
106  if (action->action_type() == Action::ACTION_CONTENT_SCRIPT) {
107    return kContentScript;
108  } else if (action->action_type() == Action::ACTION_API_CALL &&
109             action->api_name() == "tabs.executeScript") {
110    return kContentScript;
111  } else if (action->action_type() != Action::ACTION_DOM_ACCESS) {
112    return kNoStatus;
113  }
114
115  int dom_verb;
116  if (!action->other() ||
117      !action->other()->GetIntegerWithoutPathExpansion(
118          activity_log_constants::kActionDomVerb, &dom_verb)) {
119    return kNoStatus;
120  }
121
122  int ret_bit = kNoStatus;
123  DomActionType::Type dom_type = static_cast<DomActionType::Type>(dom_verb);
124  if (dom_type == DomActionType::GETTER)
125    return kReadDom;
126  if (dom_type == DomActionType::SETTER) {
127    ret_bit |= kModifiedDom;
128  } else if (dom_type == DomActionType::METHOD) {
129    ret_bit |= kDomMethod;
130  } else {
131    return kNoStatus;
132  }
133
134  if (action->api_name() == "HTMLDocument.write" ||
135      action->api_name() == "HTMLDocument.writeln") {
136    ret_bit |= kDocumentWrite;
137  } else if (action->api_name() == "Element.innerHTML") {
138    ret_bit |= kInnerHtml;
139  } else if (action->api_name() == "Document.createElement") {
140    std::string arg;
141    action->args()->GetString(0, &arg);
142    if (arg == "script") {
143      ret_bit |= kCreatedScript;
144    } else if (arg == "iframe") {
145      ret_bit |= kCreatedIframe;
146    } else if (arg == "div") {
147      ret_bit |= kCreatedDiv;
148    } else if (arg == "a") {
149      ret_bit |= kCreatedLink;
150    } else if (arg == "input") {
151      ret_bit |= kCreatedInput;
152    } else if (arg == "embed") {
153      ret_bit |= kCreatedEmbed;
154    } else if (arg == "object") {
155      ret_bit |= kCreatedObject;
156    }
157  }
158
159  const Action::InjectionType ad_injection =
160      action->DidInjectAd(g_browser_process->rappor_service());
161  switch (ad_injection) {
162    case Action::INJECTION_NEW_AD:
163      ret_bit |= kAdInjected;
164      break;
165    case Action::INJECTION_REMOVED_AD:
166      ret_bit |= kAdRemoved;
167      break;
168    case Action::INJECTION_REPLACED_AD:
169      ret_bit |= kAdReplaced;
170      break;
171    case Action::INJECTION_LIKELY_NEW_AD:
172      ret_bit |= kAdLikelyInjected;
173      break;
174    case Action::INJECTION_LIKELY_REPLACED_AD:
175      ret_bit |= kAdLikelyReplaced;
176      break;
177    case Action::NO_AD_INJECTION:
178      break;
179    case Action::NUM_INJECTION_TYPES:
180      NOTREACHED();
181  }
182
183  return ret_bit;
184}
185
186void UmaPolicy::HistogramOnClose(const std::string& cleaned_url,
187                                 content::WebContents* web_contents) {
188  // Let's try to avoid histogramming useless URLs.
189  if (cleaned_url.empty() || cleaned_url == url::kAboutBlankURL ||
190      cleaned_url == chrome::kChromeUINewTabURL)
191    return;
192
193  int statuses[MAX_STATUS - 1];
194  std::memset(statuses, 0, sizeof(statuses));
195
196  ActiveScriptController* active_script_controller =
197      ActiveScriptController::GetForWebContents(web_contents);
198  SiteMap::iterator site_lookup = url_status_.find(cleaned_url);
199  const ExtensionMap& exts = site_lookup->second;
200  std::set<std::string> ad_injectors;
201  for (ExtensionMap::const_iterator ext_iter = exts.begin();
202       ext_iter != exts.end();
203       ++ext_iter) {
204    if (ext_iter->first == kNumberOfTabs)
205      continue;
206    for (int i = NONE + 1; i < MAX_STATUS; ++i) {
207      if (ext_iter->second & (1 << i))
208        statuses[i-1]++;
209    }
210
211    if (ext_iter->second & kAnyAdActivity)
212      ad_injectors.insert(ext_iter->first);
213  }
214  if (active_script_controller)
215    active_script_controller->OnAdInjectionDetected(ad_injectors);
216
217  ExtensionRegistry* registry = ExtensionRegistry::Get(profile_);
218  for (std::set<std::string>::const_iterator iter = ad_injectors.begin();
219       iter != ad_injectors.end();
220       ++iter) {
221    const Extension* extension =
222        registry->GetExtensionById(*iter, ExtensionRegistry::EVERYTHING);
223    if (extension) {
224      UMA_HISTOGRAM_ENUMERATION("Extensions.AdInjection.InstallLocation",
225                                extension->location(),
226                                Manifest::NUM_LOCATIONS);
227    }
228  }
229
230  std::string prefix = "ExtensionActivity.";
231  if (GURL(cleaned_url).host() != "www.google.com") {
232    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CONTENT_SCRIPT),
233                             statuses[CONTENT_SCRIPT - 1]);
234    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(READ_DOM),
235                             statuses[READ_DOM - 1]);
236    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(MODIFIED_DOM),
237                             statuses[MODIFIED_DOM - 1]);
238    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOM_METHOD),
239                             statuses[DOM_METHOD - 1]);
240    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOCUMENT_WRITE),
241                             statuses[DOCUMENT_WRITE - 1]);
242    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(INNER_HTML),
243                             statuses[INNER_HTML - 1]);
244    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_SCRIPT),
245                             statuses[CREATED_SCRIPT - 1]);
246    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_IFRAME),
247                             statuses[CREATED_IFRAME - 1]);
248    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_DIV),
249                             statuses[CREATED_DIV - 1]);
250    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_LINK),
251                             statuses[CREATED_LINK - 1]);
252    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_INPUT),
253                             statuses[CREATED_INPUT - 1]);
254    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_EMBED),
255                             statuses[CREATED_EMBED - 1]);
256    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_OBJECT),
257                             statuses[CREATED_OBJECT - 1]);
258    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_INJECTED),
259                             statuses[AD_INJECTED - 1]);
260    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REMOVED),
261                             statuses[AD_REMOVED - 1]);
262    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REPLACED),
263                             statuses[AD_REPLACED - 1]);
264    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_INJECTED),
265                             statuses[AD_LIKELY_INJECTED - 1]);
266    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_REPLACED),
267                             statuses[AD_LIKELY_REPLACED - 1]);
268  } else {
269    prefix += "Google.";
270    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CONTENT_SCRIPT),
271                             statuses[CONTENT_SCRIPT - 1]);
272    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(READ_DOM),
273                             statuses[READ_DOM - 1]);
274    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(MODIFIED_DOM),
275                             statuses[MODIFIED_DOM - 1]);
276    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOM_METHOD),
277                             statuses[DOM_METHOD - 1]);
278    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(DOCUMENT_WRITE),
279                             statuses[DOCUMENT_WRITE - 1]);
280    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(INNER_HTML),
281                             statuses[INNER_HTML - 1]);
282    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_SCRIPT),
283                             statuses[CREATED_SCRIPT - 1]);
284    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_IFRAME),
285                             statuses[CREATED_IFRAME - 1]);
286    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_DIV),
287                             statuses[CREATED_DIV - 1]);
288    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_LINK),
289                             statuses[CREATED_LINK - 1]);
290    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_INPUT),
291                             statuses[CREATED_INPUT - 1]);
292    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_EMBED),
293                             statuses[CREATED_EMBED - 1]);
294    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CREATED_OBJECT),
295                             statuses[CREATED_OBJECT - 1]);
296    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_INJECTED),
297                             statuses[AD_INJECTED - 1]);
298    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REMOVED),
299                             statuses[AD_REMOVED - 1]);
300    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_REPLACED),
301                             statuses[AD_REPLACED - 1]);
302    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_INJECTED),
303                             statuses[AD_LIKELY_INJECTED - 1]);
304    UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(AD_LIKELY_REPLACED),
305                             statuses[AD_LIKELY_REPLACED - 1]);
306  }
307}
308
309// Handle tab tracking. --------------------------------------------------------
310
311void UmaPolicy::OnBrowserAdded(Browser* browser) {
312  if (!profile_->IsSameProfile(browser->profile()))
313    return;
314  browser->tab_strip_model()->AddObserver(this);
315}
316
317void UmaPolicy::OnBrowserRemoved(Browser* browser) {
318  if (!profile_->IsSameProfile(browser->profile()))
319    return;
320  browser->tab_strip_model()->RemoveObserver(this);
321}
322
323// Use the value from SessionID::IdForTab, *not* |index|. |index| will be
324// duplicated across tabs in a session, whereas IdForTab uniquely identifies
325// each tab.
326void UmaPolicy::TabChangedAt(content::WebContents* contents,
327                             int index,
328                             TabChangeType change_type) {
329  if (change_type != TabStripModelObserver::LOADING_ONLY)
330    return;
331  if (!contents)
332    return;
333
334  std::string url = CleanURL(contents->GetLastCommittedURL());
335  int32 tab_id = SessionID::IdForTab(contents);
336
337  std::map<int32, std::string>::iterator tab_it = tab_list_.find(tab_id);
338
339  // Ignore tabs that haven't changed status.
340  if (tab_it != tab_list_.end() && tab_it->second == url)
341    return;
342
343  // Is this an existing tab whose URL has changed.
344  if (tab_it != tab_list_.end()) {
345    CleanupClosedPage(tab_it->second, contents);
346    tab_list_.erase(tab_id);
347  }
348
349  // Check that tab_list_ isn't over the kMaxTabsTracked budget.
350  if (tab_list_.size() >= kMaxTabsTracked)
351    return;
352
353  // Set up the new entries.
354  tab_list_[tab_id] = url;
355  SetupOpenedPage(url);
356}
357
358// Use the value from SessionID::IdForTab, *not* |index|. |index| will be
359// duplicated across tabs in a session, whereas IdForTab uniquely identifies
360// each tab.
361void UmaPolicy::TabClosingAt(TabStripModel* tab_strip_model,
362                             content::WebContents* contents,
363                             int index) {
364  if (!contents)
365    return;
366  std::string url = CleanURL(contents->GetLastCommittedURL());
367  int32 tab_id = SessionID::IdForTab(contents);
368  std::map<int, std::string>::iterator tab_it = tab_list_.find(tab_id);
369  if (tab_it != tab_list_.end())
370    tab_list_.erase(tab_id);
371
372  CleanupClosedPage(url, contents);
373}
374
375void UmaPolicy::SetupOpenedPage(const std::string& url) {
376  url_status_[url][kNumberOfTabs]++;
377}
378
379void UmaPolicy::CleanupClosedPage(const std::string& cleaned_url,
380                                  content::WebContents* web_contents) {
381  SiteMap::iterator old_site_lookup = url_status_.find(cleaned_url);
382  if (old_site_lookup == url_status_.end())
383    return;
384  old_site_lookup->second[kNumberOfTabs]--;
385  if (old_site_lookup->second[kNumberOfTabs] == 0) {
386    HistogramOnClose(cleaned_url, web_contents);
387    url_status_.erase(cleaned_url);
388  }
389}
390
391// Helpers. --------------------------------------------------------------------
392
393// We don't want to treat # ref navigations as if they were new pageloads.
394// So we get rid of the ref if it has it.
395// We convert to a string in the hopes that this is faster than Replacements.
396std::string UmaPolicy::CleanURL(const GURL& gurl) {
397  if (gurl.spec().empty())
398    return GURL(url::kAboutBlankURL).spec();
399  if (!gurl.is_valid())
400    return gurl.spec();
401  if (!gurl.has_ref())
402    return gurl.spec();
403  std::string port = "";
404  if (gurl.has_port())
405    port = ":" + gurl.port();
406  std::string query = "";
407  if (gurl.has_query())
408    query = "?" + gurl.query();
409  return base::StringPrintf("%s://%s%s%s%s",
410                            gurl.scheme().c_str(),
411                            gurl.host().c_str(),
412                            port.c_str(),
413                            gurl.path().c_str(),
414                            query.c_str());
415}
416
417const char* UmaPolicy::GetHistogramName(PageStatus status) {
418  switch (status) {
419    case CONTENT_SCRIPT:
420      return "ContentScript";
421    case READ_DOM:
422      return "ReadDom";
423    case MODIFIED_DOM:
424      return "ModifiedDom";
425    case DOM_METHOD:
426      return "InvokedDomMethod";
427    case DOCUMENT_WRITE:
428      return "DocumentWrite";
429    case INNER_HTML:
430      return "InnerHtml";
431    case CREATED_SCRIPT:
432      return "CreatedScript";
433    case CREATED_IFRAME:
434      return "CreatedIframe";
435    case CREATED_DIV:
436      return "CreatedDiv";
437    case CREATED_LINK:
438      return "CreatedLink";
439    case CREATED_INPUT:
440      return "CreatedInput";
441    case CREATED_EMBED:
442      return "CreatedEmbed";
443    case CREATED_OBJECT:
444      return "CreatedObject";
445    case AD_INJECTED:
446      return "AdInjected";
447    case AD_REMOVED:
448      return "AdRemoved";
449    case AD_REPLACED:
450      return "AdReplaced";
451    case AD_LIKELY_INJECTED:
452      return "AdLikelyInjected";
453    case AD_LIKELY_REPLACED:
454      return "AdLikelyReplaced";
455    case NONE:
456    case MAX_STATUS:
457    default:
458      NOTREACHED();
459      return "";
460  }
461}
462
463}  // namespace extensions
464