1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/translate/content/renderer/translate_helper.h"
6
7#include "base/bind.h"
8#include "base/compiler_specific.h"
9#include "base/logging.h"
10#include "base/message_loop/message_loop.h"
11#include "base/metrics/histogram.h"
12#include "base/strings/string16.h"
13#include "base/strings/string_util.h"
14#include "base/strings/utf_string_conversions.h"
15#include "components/translate/content/common/translate_messages.h"
16#include "components/translate/core/common/translate_constants.h"
17#include "components/translate/core/common/translate_metrics.h"
18#include "components/translate/core/common/translate_util.h"
19#include "components/translate/core/language_detection/language_detection_util.h"
20#include "content/public/common/content_constants.h"
21#include "content/public/common/url_constants.h"
22#include "content/public/renderer/render_thread.h"
23#include "content/public/renderer/render_view.h"
24#include "ipc/ipc_platform_file.h"
25#include "third_party/WebKit/public/web/WebDocument.h"
26#include "third_party/WebKit/public/web/WebElement.h"
27#include "third_party/WebKit/public/web/WebFrame.h"
28#include "third_party/WebKit/public/web/WebNode.h"
29#include "third_party/WebKit/public/web/WebNodeList.h"
30#include "third_party/WebKit/public/web/WebScriptSource.h"
31#include "third_party/WebKit/public/web/WebView.h"
32#include "third_party/WebKit/public/web/WebWidget.h"
33#include "url/gurl.h"
34#include "v8/include/v8.h"
35
36using base::ASCIIToUTF16;
37using blink::WebDocument;
38using blink::WebElement;
39using blink::WebFrame;
40using blink::WebNode;
41using blink::WebNodeList;
42using blink::WebScriptSource;
43using blink::WebSecurityOrigin;
44using blink::WebString;
45using blink::WebVector;
46using blink::WebView;
47
48namespace {
49
50// The delay in milliseconds that we'll wait before checking to see if the
51// translate library injected in the page is ready.
52const int kTranslateInitCheckDelayMs = 150;
53
54// The maximum number of times we'll check to see if the translate library
55// injected in the page is ready.
56const int kMaxTranslateInitCheckAttempts = 5;
57
58// The delay we wait in milliseconds before checking whether the translation has
59// finished.
60const int kTranslateStatusCheckDelayMs = 400;
61
62// Language name passed to the Translate element for it to detect the language.
63const char kAutoDetectionLanguage[] = "auto";
64
65// Isolated world sets following content-security-policy.
66const char kContentSecurityPolicy[] = "script-src 'self' 'unsafe-eval'";
67
68// Whether or not we have set the CLD callback yet.
69bool g_cld_callback_set = false;
70
71}  // namespace
72
73namespace translate {
74
75////////////////////////////////////////////////////////////////////////////////
76// TranslateHelper, public:
77//
78TranslateHelper::TranslateHelper(content::RenderView* render_view,
79                                 int world_id,
80                                 int extension_group,
81                                 const std::string& extension_scheme)
82    : content::RenderViewObserver(render_view),
83      page_seq_no_(0),
84      translation_pending_(false),
85      cld_data_provider_(translate::CreateRendererCldDataProviderFor(this)),
86      cld_data_polling_started_(false),
87      cld_data_polling_canceled_(false),
88      deferred_page_capture_(false),
89      deferred_page_seq_no_(-1),
90      world_id_(world_id),
91      extension_group_(extension_group),
92      extension_scheme_(extension_scheme),
93      weak_method_factory_(this) {
94}
95
96TranslateHelper::~TranslateHelper() {
97  CancelPendingTranslation();
98  CancelCldDataPolling();
99}
100
101void TranslateHelper::PrepareForUrl(const GURL& url) {
102  ++page_seq_no_;
103  Send(new ChromeViewHostMsg_TranslateAssignedSequenceNumber(
104      routing_id(), page_seq_no_));
105  deferred_page_capture_ = false;
106  deferred_page_seq_no_ = -1;
107  deferred_contents_.clear();
108  if (cld_data_polling_started_)
109    return;
110
111  // TODO(andrewhayden): Refactor translate_manager.cc's IsTranslatableURL to
112  // components/translate/core/common/translate_util.cc, and ignore any URL
113  // that fails that check. This will require moving unit tests and rewiring
114  // other function calls as well, so for now replicate the logic here.
115  if (url.is_empty())
116    return;
117  if (url.SchemeIs(content::kChromeUIScheme))
118    return;
119  if (url.SchemeIs(content::kChromeDevToolsScheme))
120    return;
121  if (url.SchemeIs(url::kFtpScheme))
122    return;
123  if (url.SchemeIs(extension_scheme_.c_str()))
124    return;
125
126  // Start polling for CLD data.
127  cld_data_polling_started_ = true;
128  TranslateHelper::SendCldDataRequest(0, 1000);
129}
130
131void TranslateHelper::PageCaptured(const base::string16& contents) {
132  PageCapturedImpl(page_seq_no_, contents);
133}
134
135void TranslateHelper::PageCapturedImpl(int page_seq_no,
136                                       const base::string16& contents) {
137  // Get the document language as set by WebKit from the http-equiv
138  // meta tag for "content-language".  This may or may not also
139  // have a value derived from the actual Content-Language HTTP
140  // header.  The two actually have different meanings (despite the
141  // original intent of http-equiv to be an equivalent) with the former
142  // being the language of the document and the latter being the
143  // language of the intended audience (a distinction really only
144  // relevant for things like langauge textbooks).  This distinction
145  // shouldn't affect translation.
146  WebFrame* main_frame = GetMainFrame();
147  if (!main_frame || page_seq_no_ != page_seq_no)
148    return;
149
150  if (!cld_data_provider_->IsCldDataAvailable()) {
151    // We're in dynamic mode and CLD data isn't loaded. Retry when CLD data
152    // is loaded, if ever.
153    deferred_page_capture_ = true;
154    deferred_page_seq_no_ = page_seq_no;
155    deferred_contents_ = contents;
156    RecordLanguageDetectionTiming(DEFERRED);
157    return;
158  }
159
160  if (deferred_page_seq_no_ == -1) {
161    // CLD data was available before language detection was requested.
162    RecordLanguageDetectionTiming(ON_TIME);
163  } else {
164    // This is a request that was triggered because CLD data is now available
165    // and was previously deferred.
166    RecordLanguageDetectionTiming(RESUMED);
167  }
168
169  WebDocument document = main_frame->document();
170  std::string content_language = document.contentLanguage().utf8();
171  WebElement html_element = document.documentElement();
172  std::string html_lang;
173  // |html_element| can be null element, e.g. in
174  // BrowserTest.WindowOpenClose.
175  if (!html_element.isNull())
176    html_lang = html_element.getAttribute("lang").utf8();
177  std::string cld_language;
178  bool is_cld_reliable;
179  std::string language = DeterminePageLanguage(
180      content_language, html_lang, contents, &cld_language, &is_cld_reliable);
181
182  if (language.empty())
183    return;
184
185  language_determined_time_ = base::TimeTicks::Now();
186
187  GURL url(document.url());
188  LanguageDetectionDetails details;
189  details.time = base::Time::Now();
190  details.url = url;
191  details.content_language = content_language;
192  details.cld_language = cld_language;
193  details.is_cld_reliable = is_cld_reliable;
194  details.html_root_language = html_lang;
195  details.adopted_language = language;
196
197  // TODO(hajimehoshi): If this affects performance, it should be set only if
198  // translate-internals tab exists.
199  details.contents = contents;
200
201  Send(new ChromeViewHostMsg_TranslateLanguageDetermined(
202      routing_id(),
203      details,
204      IsTranslationAllowed(&document) && !language.empty()));
205}
206
207void TranslateHelper::CancelPendingTranslation() {
208  weak_method_factory_.InvalidateWeakPtrs();
209  translation_pending_ = false;
210  source_lang_.clear();
211  target_lang_.clear();
212  CancelCldDataPolling();
213}
214
215////////////////////////////////////////////////////////////////////////////////
216// TranslateHelper, protected:
217//
218bool TranslateHelper::IsTranslateLibAvailable() {
219  return ExecuteScriptAndGetBoolResult(
220      "typeof cr != 'undefined' && typeof cr.googleTranslate != 'undefined' && "
221      "typeof cr.googleTranslate.translate == 'function'", false);
222}
223
224bool TranslateHelper::IsTranslateLibReady() {
225  return ExecuteScriptAndGetBoolResult("cr.googleTranslate.libReady", false);
226}
227
228bool TranslateHelper::HasTranslationFinished() {
229  return ExecuteScriptAndGetBoolResult("cr.googleTranslate.finished", true);
230}
231
232bool TranslateHelper::HasTranslationFailed() {
233  return ExecuteScriptAndGetBoolResult("cr.googleTranslate.error", true);
234}
235
236bool TranslateHelper::StartTranslation() {
237  std::string script = "cr.googleTranslate.translate('" +
238                       source_lang_ +
239                       "','" +
240                       target_lang_ +
241                       "')";
242  return ExecuteScriptAndGetBoolResult(script, false);
243}
244
245std::string TranslateHelper::GetOriginalPageLanguage() {
246  return ExecuteScriptAndGetStringResult("cr.googleTranslate.sourceLang");
247}
248
249base::TimeDelta TranslateHelper::AdjustDelay(int delayInMs) {
250  // Just converts |delayInMs| without any modification in practical cases.
251  // Tests will override this function to return modified value.
252  return base::TimeDelta::FromMilliseconds(delayInMs);
253}
254
255void TranslateHelper::ExecuteScript(const std::string& script) {
256  WebFrame* main_frame = GetMainFrame();
257  if (!main_frame)
258    return;
259
260  WebScriptSource source = WebScriptSource(ASCIIToUTF16(script));
261  main_frame->executeScriptInIsolatedWorld(
262      world_id_, &source, 1, extension_group_);
263}
264
265bool TranslateHelper::ExecuteScriptAndGetBoolResult(const std::string& script,
266                                                    bool fallback) {
267  WebFrame* main_frame = GetMainFrame();
268  if (!main_frame)
269    return fallback;
270
271  v8::HandleScope handle_scope(v8::Isolate::GetCurrent());
272  WebVector<v8::Local<v8::Value> > results;
273  WebScriptSource source = WebScriptSource(ASCIIToUTF16(script));
274  main_frame->executeScriptInIsolatedWorld(
275      world_id_, &source, 1, extension_group_, &results);
276  if (results.size() != 1 || results[0].IsEmpty() || !results[0]->IsBoolean()) {
277    NOTREACHED();
278    return fallback;
279  }
280
281  return results[0]->BooleanValue();
282}
283
284std::string TranslateHelper::ExecuteScriptAndGetStringResult(
285    const std::string& script) {
286  WebFrame* main_frame = GetMainFrame();
287  if (!main_frame)
288    return std::string();
289
290  v8::HandleScope handle_scope(v8::Isolate::GetCurrent());
291  WebVector<v8::Local<v8::Value> > results;
292  WebScriptSource source = WebScriptSource(ASCIIToUTF16(script));
293  main_frame->executeScriptInIsolatedWorld(
294      world_id_, &source, 1, extension_group_, &results);
295  if (results.size() != 1 || results[0].IsEmpty() || !results[0]->IsString()) {
296    NOTREACHED();
297    return std::string();
298  }
299
300  v8::Local<v8::String> v8_str = results[0]->ToString();
301  int length = v8_str->Utf8Length() + 1;
302  scoped_ptr<char[]> str(new char[length]);
303  v8_str->WriteUtf8(str.get(), length);
304  return std::string(str.get());
305}
306
307double TranslateHelper::ExecuteScriptAndGetDoubleResult(
308    const std::string& script) {
309  WebFrame* main_frame = GetMainFrame();
310  if (!main_frame)
311    return 0.0;
312
313  v8::HandleScope handle_scope(v8::Isolate::GetCurrent());
314  WebVector<v8::Local<v8::Value> > results;
315  WebScriptSource source = WebScriptSource(ASCIIToUTF16(script));
316  main_frame->executeScriptInIsolatedWorld(
317      world_id_, &source, 1, extension_group_, &results);
318  if (results.size() != 1 || results[0].IsEmpty() || !results[0]->IsNumber()) {
319    NOTREACHED();
320    return 0.0;
321  }
322
323  return results[0]->NumberValue();
324}
325
326////////////////////////////////////////////////////////////////////////////////
327// TranslateHelper, private:
328//
329
330// static
331bool TranslateHelper::IsTranslationAllowed(WebDocument* document) {
332  WebElement head = document->head();
333  if (head.isNull() || !head.hasChildNodes())
334    return true;
335
336  const WebString meta(ASCIIToUTF16("meta"));
337  const WebString name(ASCIIToUTF16("name"));
338  const WebString google(ASCIIToUTF16("google"));
339  const WebString value(ASCIIToUTF16("value"));
340  const WebString content(ASCIIToUTF16("content"));
341
342  WebNodeList children = head.childNodes();
343  for (size_t i = 0; i < children.length(); ++i) {
344    WebNode node = children.item(i);
345    if (!node.isElementNode())
346      continue;
347    WebElement element = node.to<WebElement>();
348    // Check if a tag is <meta>.
349    if (!element.hasHTMLTagName(meta))
350      continue;
351    // Check if the tag contains name="google".
352    WebString attribute = element.getAttribute(name);
353    if (attribute.isNull() || attribute != google)
354      continue;
355    // Check if the tag contains value="notranslate", or content="notranslate".
356    attribute = element.getAttribute(value);
357    if (attribute.isNull())
358      attribute = element.getAttribute(content);
359    if (attribute.isNull())
360      continue;
361    if (LowerCaseEqualsASCII(attribute, "notranslate"))
362      return false;
363  }
364  return true;
365}
366
367bool TranslateHelper::OnMessageReceived(const IPC::Message& message) {
368  bool handled = true;
369  IPC_BEGIN_MESSAGE_MAP(TranslateHelper, message)
370    IPC_MESSAGE_HANDLER(ChromeViewMsg_TranslatePage, OnTranslatePage)
371    IPC_MESSAGE_HANDLER(ChromeViewMsg_RevertTranslation, OnRevertTranslation)
372    IPC_MESSAGE_UNHANDLED(handled = false)
373  IPC_END_MESSAGE_MAP()
374  if (!handled) {
375    handled = cld_data_provider_->OnMessageReceived(message);
376  }
377  return handled;
378}
379
380void TranslateHelper::OnTranslatePage(int page_seq_no,
381                                      const std::string& translate_script,
382                                      const std::string& source_lang,
383                                      const std::string& target_lang) {
384  WebFrame* main_frame = GetMainFrame();
385  if (!main_frame || page_seq_no_ != page_seq_no)
386    return;  // We navigated away, nothing to do.
387
388  // A similar translation is already under way, nothing to do.
389  if (translation_pending_ && target_lang_ == target_lang)
390    return;
391
392  // Any pending translation is now irrelevant.
393  CancelPendingTranslation();
394
395  // Set our states.
396  translation_pending_ = true;
397
398  // If the source language is undetermined, we'll let the translate element
399  // detect it.
400  source_lang_ = (source_lang != kUnknownLanguageCode) ? source_lang
401                                                       : kAutoDetectionLanguage;
402  target_lang_ = target_lang;
403
404  ReportUserActionDuration(language_determined_time_, base::TimeTicks::Now());
405
406  GURL url(main_frame->document().url());
407  ReportPageScheme(url.scheme());
408
409  // Set up v8 isolated world with proper content-security-policy and
410  // security-origin.
411  WebFrame* frame = GetMainFrame();
412  if (frame) {
413    frame->setIsolatedWorldContentSecurityPolicy(
414        world_id_, WebString::fromUTF8(kContentSecurityPolicy));
415
416    GURL security_origin = GetTranslateSecurityOrigin();
417    frame->setIsolatedWorldSecurityOrigin(
418        world_id_, WebSecurityOrigin::create(security_origin));
419  }
420
421  if (!IsTranslateLibAvailable()) {
422    // Evaluate the script to add the translation related method to the global
423    // context of the page.
424    ExecuteScript(translate_script);
425    DCHECK(IsTranslateLibAvailable());
426  }
427
428  TranslatePageImpl(page_seq_no, 0);
429}
430
431void TranslateHelper::OnRevertTranslation(int page_seq_no) {
432  if (page_seq_no_ != page_seq_no)
433    return;  // We navigated away, nothing to do.
434
435  if (!IsTranslateLibAvailable()) {
436    NOTREACHED();
437    return;
438  }
439
440  CancelPendingTranslation();
441
442  ExecuteScript("cr.googleTranslate.revert()");
443}
444
445void TranslateHelper::CheckTranslateStatus(int page_seq_no) {
446  // If this is not the same page, the translation has been canceled.  If the
447  // view is gone, the page is closing.
448  if (page_seq_no_ != page_seq_no || !render_view()->GetWebView())
449    return;
450
451  // First check if there was an error.
452  if (HasTranslationFailed()) {
453    // TODO(toyoshim): Check |errorCode| of translate.js and notify it here.
454    NotifyBrowserTranslationFailed(TranslateErrors::TRANSLATION_ERROR);
455    return;  // There was an error.
456  }
457
458  if (HasTranslationFinished()) {
459    std::string actual_source_lang;
460    // Translation was successfull, if it was auto, retrieve the source
461    // language the Translate Element detected.
462    if (source_lang_ == kAutoDetectionLanguage) {
463      actual_source_lang = GetOriginalPageLanguage();
464      if (actual_source_lang.empty()) {
465        NotifyBrowserTranslationFailed(TranslateErrors::UNKNOWN_LANGUAGE);
466        return;
467      } else if (actual_source_lang == target_lang_) {
468        NotifyBrowserTranslationFailed(TranslateErrors::IDENTICAL_LANGUAGES);
469        return;
470      }
471    } else {
472      actual_source_lang = source_lang_;
473    }
474
475    if (!translation_pending_) {
476      NOTREACHED();
477      return;
478    }
479
480    translation_pending_ = false;
481
482    // Check JavaScript performance counters for UMA reports.
483    ReportTimeToTranslate(
484        ExecuteScriptAndGetDoubleResult("cr.googleTranslate.translationTime"));
485
486    // Notify the browser we are done.
487    render_view()->Send(
488        new ChromeViewHostMsg_PageTranslated(render_view()->GetRoutingID(),
489                                             actual_source_lang,
490                                             target_lang_,
491                                             TranslateErrors::NONE));
492    return;
493  }
494
495  // The translation is still pending, check again later.
496  base::MessageLoop::current()->PostDelayedTask(
497      FROM_HERE,
498      base::Bind(&TranslateHelper::CheckTranslateStatus,
499                 weak_method_factory_.GetWeakPtr(), page_seq_no),
500      AdjustDelay(kTranslateStatusCheckDelayMs));
501}
502
503void TranslateHelper::TranslatePageImpl(int page_seq_no, int count) {
504  DCHECK_LT(count, kMaxTranslateInitCheckAttempts);
505  if (page_seq_no_ != page_seq_no || !render_view()->GetWebView())
506    return;
507
508  if (!IsTranslateLibReady()) {
509    // The library is not ready, try again later, unless we have tried several
510    // times unsucessfully already.
511    if (++count >= kMaxTranslateInitCheckAttempts) {
512      NotifyBrowserTranslationFailed(TranslateErrors::INITIALIZATION_ERROR);
513      return;
514    }
515    base::MessageLoop::current()->PostDelayedTask(
516        FROM_HERE,
517        base::Bind(&TranslateHelper::TranslatePageImpl,
518                   weak_method_factory_.GetWeakPtr(),
519                   page_seq_no, count),
520        AdjustDelay(count * kTranslateInitCheckDelayMs));
521    return;
522  }
523
524  // The library is loaded, and ready for translation now.
525  // Check JavaScript performance counters for UMA reports.
526  ReportTimeToBeReady(
527      ExecuteScriptAndGetDoubleResult("cr.googleTranslate.readyTime"));
528  ReportTimeToLoad(
529      ExecuteScriptAndGetDoubleResult("cr.googleTranslate.loadTime"));
530
531  if (!StartTranslation()) {
532    NotifyBrowserTranslationFailed(TranslateErrors::TRANSLATION_ERROR);
533    return;
534  }
535  // Check the status of the translation.
536  base::MessageLoop::current()->PostDelayedTask(
537      FROM_HERE,
538      base::Bind(&TranslateHelper::CheckTranslateStatus,
539                 weak_method_factory_.GetWeakPtr(), page_seq_no),
540      AdjustDelay(kTranslateStatusCheckDelayMs));
541}
542
543void TranslateHelper::NotifyBrowserTranslationFailed(
544    TranslateErrors::Type error) {
545  translation_pending_ = false;
546  // Notify the browser there was an error.
547  render_view()->Send(new ChromeViewHostMsg_PageTranslated(
548      render_view()->GetRoutingID(), source_lang_, target_lang_, error));
549}
550
551WebFrame* TranslateHelper::GetMainFrame() {
552  WebView* web_view = render_view()->GetWebView();
553
554  // When the tab is going to be closed, the web_view can be NULL.
555  if (!web_view)
556    return NULL;
557
558  return web_view->mainFrame();
559}
560
561void TranslateHelper::CancelCldDataPolling() {
562  cld_data_polling_canceled_ = true;
563}
564
565void TranslateHelper::SendCldDataRequest(const int delay_millis,
566                                         const int next_delay_millis) {
567  // Terminate immediately if told to stop polling.
568  if (cld_data_polling_canceled_)
569    return;
570
571  // Terminate immediately if data is already loaded.
572  if (cld_data_provider_->IsCldDataAvailable())
573    return;
574
575  if (!g_cld_callback_set) {
576    g_cld_callback_set = true;
577    cld_data_provider_->SetCldAvailableCallback(
578        base::Bind(&TranslateHelper::OnCldDataAvailable,
579                   weak_method_factory_.GetWeakPtr()));
580  }
581
582  // Else, make an asynchronous request to get the data we need.
583  cld_data_provider_->SendCldDataRequest();
584
585  // ... and enqueue another delayed task to call again. This will start a
586  // chain of polling that will last until the pointer stops being NULL,
587  // which is the right thing to do.
588  // NB: In the great majority of cases, the data file will be available and
589  // the very first delayed task will be a no-op that terminates the chain.
590  // It's only while downloading the file that this will chain for a
591  // nontrivial amount of time.
592  // Use a weak pointer to avoid keeping this helper object around forever.
593  base::MessageLoop::current()->PostDelayedTask(
594      FROM_HERE,
595      base::Bind(&TranslateHelper::SendCldDataRequest,
596                 weak_method_factory_.GetWeakPtr(),
597                 next_delay_millis,
598                 next_delay_millis),
599      base::TimeDelta::FromMilliseconds(delay_millis));
600}
601
602void TranslateHelper::OnCldDataAvailable() {
603  if (deferred_page_capture_) {
604    deferred_page_capture_ = false; // Don't do this a second time.
605    PageCapturedImpl(deferred_page_seq_no_, deferred_contents_);
606    deferred_page_seq_no_ = -1; // Clean up for sanity
607    deferred_contents_.clear(); // Clean up for sanity
608  }
609}
610
611void TranslateHelper::RecordLanguageDetectionTiming(
612    LanguageDetectionTiming timing) {
613  // The following comment is copied from page_load_histograms.cc, and applies
614  // just as equally here:
615  //
616  // Since there are currently no guarantees that renderer histograms will be
617  // sent to the browser, we initiate a PostTask here to be sure that we send
618  // the histograms we generated.  Without this call, pages that don't have an
619  // on-close-handler might generate data that is lost when the renderer is
620  // shutdown abruptly (perchance because the user closed the tab).
621  DVLOG(1) << "Language detection timing: " << timing;
622  UMA_HISTOGRAM_ENUMERATION("Translate.LanguageDetectionTiming", timing,
623                            LANGUAGE_DETECTION_TIMING_MAX_VALUE);
624
625  // Note on performance: Under normal circumstances, this should get called
626  // once per page load. The code will either manage to do it ON_TIME or will
627  // be DEFERRED until CLD is ready. In the latter case, CLD is in dynamic mode
628  // and may eventually become available, triggering the RESUMED event; after
629  // this, everything should start being ON_TIME. This should never run more
630  // than twice in a page load, under any conditions.
631  // Also note that language detection is triggered off of a delay AFTER the
632  // page load completed event has fired, making this very much off the critical
633  // path.
634  content::RenderThread::Get()->UpdateHistograms(
635      content::kHistogramSynchronizerReservedSequenceNumber);
636}
637
638}  // namespace translate
639