1// Copyright (c) 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/extensions/activity_log/activity_actions.h"
6
7#include <algorithm>  // for std::find.
8#include <string>
9
10#include "base/command_line.h"
11#include "base/format_macros.h"
12#include "base/json/json_string_value_serializer.h"
13#include "base/logging.h"
14#include "base/macros.h"
15#include "base/memory/singleton.h"
16#include "base/metrics/histogram.h"
17#include "base/strings/string_number_conversions.h"
18#include "base/strings/string_util.h"
19#include "base/strings/stringprintf.h"
20#include "base/values.h"
21#include "chrome/browser/extensions/activity_log/activity_action_constants.h"
22#include "chrome/browser/extensions/activity_log/ad_network_database.h"
23#include "chrome/browser/extensions/activity_log/fullstream_ui_policy.h"
24#include "chrome/browser/ui/browser.h"
25#include "chrome/common/chrome_switches.h"
26#include "components/rappor/rappor_service.h"
27#include "content/public/browser/web_contents.h"
28#include "extensions/common/constants.h"
29#include "extensions/common/dom_action_types.h"
30#include "sql/statement.h"
31#include "url/gurl.h"
32
33namespace constants = activity_log_constants;
34
35namespace extensions {
36
37namespace {
38
39// The "Extensions.PossibleAdInjection2" metric uses different Rappor
40// parameters than the original metric.
41const char* kExtensionAdInjectionRapporMetricName =
42    "Extensions.PossibleAdInjection2";
43
44const char kBlinkSetAttributeEvent[] = "blinkSetAttribute";
45const char kBlinkAddElementEvent[] = "blinkAddElement";
46
47const char kIframe[] = "iframe";
48const char kAnchor[] = "a";
49const char kScript[] = "script";
50
51const char kSrc[] = "src";
52const char kHref[] = "href";
53
54std::string Serialize(const base::Value* value) {
55  std::string value_as_text;
56  if (!value) {
57    value_as_text = "null";
58  } else {
59    JSONStringValueSerializer serializer(&value_as_text);
60    serializer.SerializeAndOmitBinaryValues(*value);
61  }
62  return value_as_text;
63}
64
65}  // namespace
66
67using api::activity_log_private::ExtensionActivity;
68
69Action::Action(const std::string& extension_id,
70               const base::Time& time,
71               const ActionType action_type,
72               const std::string& api_name,
73               int64 action_id)
74    : extension_id_(extension_id),
75      time_(time),
76      action_type_(action_type),
77      api_name_(api_name),
78      page_incognito_(false),
79      arg_incognito_(false),
80      count_(0),
81      action_id_(action_id) {}
82
83Action::~Action() {}
84
85// TODO(mvrable): As an optimization, we might return this directly if the
86// refcount is one.  However, there are likely to be other stray references in
87// many cases that will prevent this optimization.
88scoped_refptr<Action> Action::Clone() const {
89  scoped_refptr<Action> clone(
90      new Action(
91          extension_id(), time(), action_type(), api_name(), action_id()));
92  if (args())
93    clone->set_args(make_scoped_ptr(args()->DeepCopy()));
94  clone->set_page_url(page_url());
95  clone->set_page_title(page_title());
96  clone->set_page_incognito(page_incognito());
97  clone->set_arg_url(arg_url());
98  clone->set_arg_incognito(arg_incognito());
99  if (other())
100    clone->set_other(make_scoped_ptr(other()->DeepCopy()));
101  return clone;
102}
103
104Action::InjectionType Action::DidInjectAd(
105    rappor::RapporService* rappor_service) const {
106  MaybeUploadUrl(rappor_service);
107
108  // We should always have an AdNetworkDatabase, but, on the offchance we don't,
109  // don't crash in a release build.
110  if (!AdNetworkDatabase::Get()) {
111    NOTREACHED();
112    return NO_AD_INJECTION;
113  }
114
115  AdType ad_type = AD_TYPE_NONE;
116  InjectionType injection_type = NO_AD_INJECTION;
117
118  if (api_name_ == kBlinkSetAttributeEvent) {
119    std::string element_name;
120    std::string attr_name;
121    if (args_.get()) {
122      args_->GetString(0u, &element_name);
123      args_->GetString(1u, &attr_name);
124    }
125    if (attr_name == kSrc) {
126      if (element_name == kIframe)
127        ad_type = AD_TYPE_IFRAME;
128      else if (element_name == kScript)
129        ad_type = AD_TYPE_SCRIPT;
130    } else if (element_name == kAnchor && attr_name == kHref) {
131      ad_type = AD_TYPE_ANCHOR;
132    }
133
134    if (ad_type != AD_TYPE_NONE)
135      injection_type = CheckAttrModification();
136  } else if (api_name_ == kBlinkAddElementEvent) {
137    std::string element_name;
138    if (args_.get())
139      args_->GetString(0u, &element_name);
140    if (element_name == kIframe)
141      ad_type = AD_TYPE_IFRAME;
142    else if (element_name == kAnchor)
143      ad_type = AD_TYPE_ANCHOR;
144    else if (element_name == kScript)
145      ad_type = AD_TYPE_SCRIPT;
146
147    if (ad_type != AD_TYPE_NONE)
148      injection_type = CheckElementAddition();
149  }
150
151  if (injection_type != NO_AD_INJECTION) {
152    UMA_HISTOGRAM_ENUMERATION(
153        "Extensions.AdInjection.AdType", ad_type, Action::NUM_AD_TYPES);
154  }
155
156  return injection_type;
157}
158
159void Action::set_args(scoped_ptr<base::ListValue> args) {
160  args_.reset(args.release());
161}
162
163base::ListValue* Action::mutable_args() {
164  if (!args_.get()) {
165    args_.reset(new base::ListValue());
166  }
167  return args_.get();
168}
169
170void Action::set_page_url(const GURL& page_url) {
171  page_url_ = page_url;
172}
173
174void Action::set_arg_url(const GURL& arg_url) {
175  arg_url_ = arg_url;
176}
177
178void Action::set_other(scoped_ptr<base::DictionaryValue> other) {
179  other_.reset(other.release());
180}
181
182base::DictionaryValue* Action::mutable_other() {
183  if (!other_.get()) {
184    other_.reset(new base::DictionaryValue());
185  }
186  return other_.get();
187}
188
189std::string Action::SerializePageUrl() const {
190  return (page_incognito() ? constants::kIncognitoUrl : "") + page_url().spec();
191}
192
193void Action::ParsePageUrl(const std::string& url) {
194  set_page_incognito(StartsWithASCII(url, constants::kIncognitoUrl, true));
195  if (page_incognito())
196    set_page_url(GURL(url.substr(strlen(constants::kIncognitoUrl))));
197  else
198    set_page_url(GURL(url));
199}
200
201std::string Action::SerializeArgUrl() const {
202  return (arg_incognito() ? constants::kIncognitoUrl : "") + arg_url().spec();
203}
204
205void Action::ParseArgUrl(const std::string& url) {
206  set_arg_incognito(StartsWithASCII(url, constants::kIncognitoUrl, true));
207  if (arg_incognito())
208    set_arg_url(GURL(url.substr(strlen(constants::kIncognitoUrl))));
209  else
210    set_arg_url(GURL(url));
211}
212
213scoped_ptr<ExtensionActivity> Action::ConvertToExtensionActivity() {
214  scoped_ptr<ExtensionActivity> result(new ExtensionActivity);
215
216  // We do this translation instead of using the same enum because the database
217  // values need to be stable; this allows us to change the extension API
218  // without affecting the database.
219  switch (action_type()) {
220    case ACTION_API_CALL:
221      result->activity_type = ExtensionActivity::ACTIVITY_TYPE_API_CALL;
222      break;
223    case ACTION_API_EVENT:
224      result->activity_type = ExtensionActivity::ACTIVITY_TYPE_API_EVENT;
225      break;
226    case ACTION_CONTENT_SCRIPT:
227      result->activity_type = ExtensionActivity::ACTIVITY_TYPE_CONTENT_SCRIPT;
228      break;
229    case ACTION_DOM_ACCESS:
230      result->activity_type = ExtensionActivity::ACTIVITY_TYPE_DOM_ACCESS;
231      break;
232    case ACTION_DOM_EVENT:
233      result->activity_type = ExtensionActivity::ACTIVITY_TYPE_DOM_EVENT;
234      break;
235    case ACTION_WEB_REQUEST:
236      result->activity_type = ExtensionActivity::ACTIVITY_TYPE_WEB_REQUEST;
237      break;
238    case UNUSED_ACTION_API_BLOCKED:
239    case ACTION_ANY:
240    default:
241      // This shouldn't be reached, but some people might have old or otherwise
242      // weird db entries. Treat it like an API call if that happens.
243      result->activity_type = ExtensionActivity::ACTIVITY_TYPE_API_CALL;
244      break;
245  }
246
247  result->extension_id.reset(new std::string(extension_id()));
248  result->time.reset(new double(time().ToJsTime()));
249  result->count.reset(new double(count()));
250  result->api_call.reset(new std::string(api_name()));
251  result->args.reset(new std::string(Serialize(args())));
252  if (action_id() != -1)
253    result->activity_id.reset(
254        new std::string(base::StringPrintf("%" PRId64, action_id())));
255  if (page_url().is_valid()) {
256    if (!page_title().empty())
257      result->page_title.reset(new std::string(page_title()));
258    result->page_url.reset(new std::string(SerializePageUrl()));
259  }
260  if (arg_url().is_valid())
261    result->arg_url.reset(new std::string(SerializeArgUrl()));
262
263  if (other()) {
264    scoped_ptr<ExtensionActivity::Other> other_field(
265        new ExtensionActivity::Other);
266    bool prerender;
267    if (other()->GetBooleanWithoutPathExpansion(constants::kActionPrerender,
268                                                &prerender)) {
269      other_field->prerender.reset(new bool(prerender));
270    }
271    const base::DictionaryValue* web_request;
272    if (other()->GetDictionaryWithoutPathExpansion(constants::kActionWebRequest,
273                                                   &web_request)) {
274      other_field->web_request.reset(new std::string(
275          ActivityLogPolicy::Util::Serialize(web_request)));
276    }
277    std::string extra;
278    if (other()->GetStringWithoutPathExpansion(constants::kActionExtra, &extra))
279      other_field->extra.reset(new std::string(extra));
280    int dom_verb;
281    if (other()->GetIntegerWithoutPathExpansion(constants::kActionDomVerb,
282                                                &dom_verb)) {
283      switch (static_cast<DomActionType::Type>(dom_verb)) {
284        case DomActionType::GETTER:
285          other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_GETTER;
286          break;
287        case DomActionType::SETTER:
288          other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_SETTER;
289          break;
290        case DomActionType::METHOD:
291          other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_METHOD;
292          break;
293        case DomActionType::INSERTED:
294          other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_INSERTED;
295          break;
296        case DomActionType::XHR:
297          other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_XHR;
298          break;
299        case DomActionType::WEBREQUEST:
300          other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_WEBREQUEST;
301          break;
302        case DomActionType::MODIFIED:
303          other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_MODIFIED;
304          break;
305        default:
306          other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_NONE;
307      }
308    } else {
309      other_field->dom_verb = ExtensionActivity::Other::DOM_VERB_NONE;
310    }
311    result->other.reset(other_field.release());
312  }
313
314  return result.Pass();
315}
316
317std::string Action::PrintForDebug() const {
318  std::string result = base::StringPrintf("ACTION ID=%" PRId64, action_id());
319  result += " EXTENSION ID=" + extension_id() + " CATEGORY=";
320  switch (action_type_) {
321    case ACTION_API_CALL:
322      result += "api_call";
323      break;
324    case ACTION_API_EVENT:
325      result += "api_event_callback";
326      break;
327    case ACTION_WEB_REQUEST:
328      result += "webrequest";
329      break;
330    case ACTION_CONTENT_SCRIPT:
331      result += "content_script";
332      break;
333    case UNUSED_ACTION_API_BLOCKED:
334      // This is deprecated.
335      result += "api_blocked";
336      break;
337    case ACTION_DOM_EVENT:
338      result += "dom_event";
339      break;
340    case ACTION_DOM_ACCESS:
341      result += "dom_access";
342      break;
343    default:
344      result += base::StringPrintf("type%d", static_cast<int>(action_type_));
345  }
346
347  result += " API=" + api_name_;
348  if (args_.get()) {
349    result += " ARGS=" + Serialize(args_.get());
350  }
351  if (page_url_.is_valid()) {
352    if (page_incognito_)
353      result += " PAGE_URL=(incognito)" + page_url_.spec();
354    else
355      result += " PAGE_URL=" + page_url_.spec();
356  }
357  if (!page_title_.empty()) {
358    base::StringValue title(page_title_);
359    result += " PAGE_TITLE=" + Serialize(&title);
360  }
361  if (arg_url_.is_valid()) {
362    if (arg_incognito_)
363      result += " ARG_URL=(incognito)" + arg_url_.spec();
364    else
365      result += " ARG_URL=" + arg_url_.spec();
366  }
367  if (other_.get()) {
368    result += " OTHER=" + Serialize(other_.get());
369  }
370
371  result += base::StringPrintf(" COUNT=%d", count_);
372  return result;
373}
374
375bool Action::UrlCouldBeAd(const GURL& url) const {
376  // Ads can only be valid urls that don't match the page's host (linking to the
377  // current page should be considered valid use), and aren't local to the
378  // extension.
379  return url.is_valid() &&
380         !url.is_empty() &&
381         url.host() != page_url_.host() &&
382         !url.SchemeIs(kExtensionScheme);
383}
384
385void Action::MaybeUploadUrl(rappor::RapporService* rappor_service) const {
386  // Don't bother recording if the url is innocuous (or no |rappor_service|).
387  if (!rappor_service)
388    return;
389
390  GURL url;
391
392  if (api_name_ == kBlinkSetAttributeEvent) {
393    std::string element_name;
394    std::string attr_name;
395    std::string url_string;
396    if (args_.get()) {
397      args_->GetString(0u, &element_name);
398      args_->GetString(1u, &attr_name);
399    }
400    if (element_name == kIframe && attr_name == kSrc) {
401      args_->GetString(3u, &url_string);
402      url = GURL(url_string);
403    } else if (element_name == kAnchor && attr_name == kHref) {
404      args_->GetString(3u, &url_string);
405      url = GURL(url_string);
406    }
407  } else if (api_name_ == kBlinkAddElementEvent) {
408    std::string element_name;
409    std::string url_string;
410    if (args_.get())
411      args_->GetString(0u, &element_name);
412    if (element_name == kIframe) {
413      args_->GetString(1u, &url_string);
414      url = GURL(url_string);
415    } else if (element_name == kAnchor) {
416      args_->GetString(1u, &url_string);
417      url = GURL(url_string);
418    }
419  }
420
421  if (!UrlCouldBeAd(url))
422    return;
423
424  // Record the URL - an ad *may* have been injected.
425  rappor_service->RecordSample(kExtensionAdInjectionRapporMetricName,
426                               rappor::ETLD_PLUS_ONE_RAPPOR_TYPE,
427                               url.host());
428}
429
430Action::InjectionType Action::CheckAttrModification() const {
431  if (api_name_ != kBlinkSetAttributeEvent)
432    return NO_AD_INJECTION;
433
434  const AdNetworkDatabase* database = AdNetworkDatabase::Get();
435
436  GURL prev_url;
437  std::string prev_url_string;
438  if (args_.get() && args_->GetString(2u, &prev_url_string))
439    prev_url = GURL(prev_url_string);
440
441  GURL new_url;
442  std::string new_url_string;
443  if (args_.get() && args_->GetString(3u, &new_url_string))
444    new_url = GURL(new_url_string);
445
446  bool new_url_could_be_ad = UrlCouldBeAd(new_url);
447  bool prev_url_valid = prev_url.is_valid() && !prev_url.is_empty();
448
449  bool injected_ad = new_url_could_be_ad && database->IsAdNetwork(new_url);
450  bool replaced_ad = prev_url_valid && database->IsAdNetwork(prev_url);
451
452  if (injected_ad && replaced_ad)
453    return INJECTION_REPLACED_AD;
454  if (injected_ad)
455    return INJECTION_NEW_AD;
456  if (replaced_ad)
457    return INJECTION_REMOVED_AD;
458
459  // If the extension modified the URL with an external, valid URL then there's
460  // a good chance it's ad injection. Log it as a likely one, which also helps
461  // us determine the effectiveness of our IsAdNetwork() recognition.
462  if (new_url_could_be_ad) {
463    if (prev_url_valid)
464      return INJECTION_LIKELY_REPLACED_AD;
465    return INJECTION_LIKELY_NEW_AD;
466  }
467
468  return NO_AD_INJECTION;
469}
470
471Action::InjectionType Action::CheckElementAddition() const {
472  DCHECK_EQ(kBlinkAddElementEvent, api_name_);
473
474  GURL url;
475  std::string url_string;
476  if (args_.get() && args_->GetString(1u, &url_string))
477    url = GURL(url_string);
478
479  if (UrlCouldBeAd(url)) {
480    if (AdNetworkDatabase::Get()->IsAdNetwork(url))
481      return INJECTION_NEW_AD;
482    // If the extension injected an URL which is not local to itself or the
483    // page, there is a good chance it could be a new ad, and our database
484    // missed it.
485    return INJECTION_LIKELY_NEW_AD;
486  }
487  return NO_AD_INJECTION;
488}
489
490bool ActionComparator::operator()(
491    const scoped_refptr<Action>& lhs,
492    const scoped_refptr<Action>& rhs) const {
493  if (lhs->time() != rhs->time())
494    return lhs->time() < rhs->time();
495  else if (lhs->action_id() != rhs->action_id())
496    return lhs->action_id() < rhs->action_id();
497  else
498    return ActionComparatorExcludingTimeAndActionId()(lhs, rhs);
499}
500
501bool ActionComparatorExcludingTimeAndActionId::operator()(
502    const scoped_refptr<Action>& lhs,
503    const scoped_refptr<Action>& rhs) const {
504  if (lhs->extension_id() != rhs->extension_id())
505    return lhs->extension_id() < rhs->extension_id();
506  if (lhs->action_type() != rhs->action_type())
507    return lhs->action_type() < rhs->action_type();
508  if (lhs->api_name() != rhs->api_name())
509    return lhs->api_name() < rhs->api_name();
510
511  // args might be null; treat a null value as less than all non-null values,
512  // including the empty string.
513  if (!lhs->args() && rhs->args())
514    return true;
515  if (lhs->args() && !rhs->args())
516    return false;
517  if (lhs->args() && rhs->args()) {
518    std::string lhs_args = ActivityLogPolicy::Util::Serialize(lhs->args());
519    std::string rhs_args = ActivityLogPolicy::Util::Serialize(rhs->args());
520    if (lhs_args != rhs_args)
521      return lhs_args < rhs_args;
522  }
523
524  // Compare URLs as strings, and treat the incognito flag as a separate field.
525  if (lhs->page_url().spec() != rhs->page_url().spec())
526    return lhs->page_url().spec() < rhs->page_url().spec();
527  if (lhs->page_incognito() != rhs->page_incognito())
528    return lhs->page_incognito() < rhs->page_incognito();
529
530  if (lhs->page_title() != rhs->page_title())
531    return lhs->page_title() < rhs->page_title();
532
533  if (lhs->arg_url().spec() != rhs->arg_url().spec())
534    return lhs->arg_url().spec() < rhs->arg_url().spec();
535  if (lhs->arg_incognito() != rhs->arg_incognito())
536    return lhs->arg_incognito() < rhs->arg_incognito();
537
538  // other is treated much like the args field.
539  if (!lhs->other() && rhs->other())
540    return true;
541  if (lhs->other() && !rhs->other())
542    return false;
543  if (lhs->other() && rhs->other()) {
544    std::string lhs_other = ActivityLogPolicy::Util::Serialize(lhs->other());
545    std::string rhs_other = ActivityLogPolicy::Util::Serialize(rhs->other());
546    if (lhs_other != rhs_other)
547      return lhs_other < rhs_other;
548  }
549
550  // All fields compare as equal if this point is reached.
551  return false;
552}
553
554}  // namespace extensions
555