1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef COMPONENTS_TRANSLATE_CONTENT_RENDERER_TRANSLATE_HELPER_H_
6#define COMPONENTS_TRANSLATE_CONTENT_RENDERER_TRANSLATE_HELPER_H_
7
8#include <string>
9
10#include "base/gtest_prod_util.h"
11#include "base/memory/scoped_ptr.h"
12#include "base/memory/weak_ptr.h"
13#include "base/strings/string16.h"
14#include "base/time/time.h"
15#include "components/translate/content/renderer/renderer_cld_data_provider.h"
16#include "components/translate/core/common/translate_errors.h"
17#include "content/public/renderer/render_view_observer.h"
18#include "url/gurl.h"
19
20namespace blink {
21class WebDocument;
22class WebFrame;
23}
24
25namespace content {
26class RendererCldDataProvider;
27}
28
29namespace translate {
30
31// This class deals with page translation.
32// There is one TranslateHelper per RenderView.
33//
34// This class provides metrics that allow tracking the user experience impact
35// of non-static CldDataProvider implementations. For background on the data
36// providers, please refer to the following documentation:
37// http://www.chromium.org/developers/how-tos/compact-language-detector-cld-data-source-configuration
38//
39// Available metrics (from the LanguageDetectionTiming enum):
40// 1. ON_TIME
41//    Recorded if PageCaptured(...) is invoked after CLD is available. This is
42//    the ideal case, indicating that CLD is available before it is needed.
43// 2. DEFERRED
44//    Recorded if PageCaptured(...) is invoked before CLD is available.
45//    Sub-optimal case indicating that CLD wasn't available when it was needed,
46//    so the request for detection has been deferred until CLD is available or
47//    until the user navigates to a different page.
48// 3. RESUMED
49//    Recorded if CLD becomes available after a language detection request was
50//    deferred, but before the user navigated to a different page. Language
51//    detection is ultimately completed, it just didn't happen on time.
52//
53// Note that there is NOT a metric that records the number of times that
54// language detection had to be aborted because CLD never became available in
55// time. This is because there is no reasonable way to cover all the cases
56// under which this could occur, particularly the destruction of the renderer
57// for which this object was created. However, this value can be synthetically
58// derived, using the logic below.
59//
60// Every page load that triggers language detection will result in the
61// recording of exactly one of the first two events: ON_TIME or DEFERRED. If
62// CLD is available in time to satisfy the request, the third event (RESUMED)
63// will be recorded; thus, the number of times when language detection
64// ultimately fails because CLD isn't ever available is implied as the number of
65// times that detection is deferred minus the number of times that language
66// detection is late:
67//
68//   count(FAILED) ~= count(DEFERRED) - count(RESUMED)
69//
70// Note that this is not 100% accurate: some renderer process are so short-lived
71// that language detection wouldn't have been relevant anyway, and so a failure
72// to detect the language in a timely manner might be completely innocuous. The
73// overall problem with language detection is that it isn't possible to know
74// whether it was required or not until after it has been performed!
75//
76// We use histograms for recording these metrics. On Android, the renderer can
77// be killed without the chance to clean up or transmit these histograms,
78// leading to dropped metrics. To work around this, this method forces an IPC
79// message to be sent to the browser process immediately.
80class TranslateHelper : public content::RenderViewObserver {
81 public:
82  explicit TranslateHelper(content::RenderView* render_view,
83                           int world_id,
84                           int extension_group,
85                           const std::string& extension_scheme);
86  virtual ~TranslateHelper();
87
88  // Informs us that the page's text has been extracted.
89  void PageCaptured(const base::string16& contents);
90
91  // Lets the translation system know that we are preparing to navigate to
92  // the specified URL. If there is anything that can or should be done before
93  // this URL loads, this is the time to prepare for it.
94  void PrepareForUrl(const GURL& url);
95
96 protected:
97  // The following methods are protected so they can be overridden in
98  // unit-tests.
99  void OnTranslatePage(int page_seq_no,
100                       const std::string& translate_script,
101                       const std::string& source_lang,
102                       const std::string& target_lang);
103  void OnRevertTranslation(int page_seq_no);
104
105  // Returns true if the translate library is available, meaning the JavaScript
106  // has already been injected in that page.
107  virtual bool IsTranslateLibAvailable();
108
109  // Returns true if the translate library has been initialized successfully.
110  virtual bool IsTranslateLibReady();
111
112  // Returns true if the translation script has finished translating the page.
113  virtual bool HasTranslationFinished();
114
115  // Returns true if the translation script has reported an error performing the
116  // translation.
117  virtual bool HasTranslationFailed();
118
119  // Starts the translation by calling the translate library.  This method
120  // should only be called when the translate script has been injected in the
121  // page.  Returns false if the call failed immediately.
122  virtual bool StartTranslation();
123
124  // Asks the Translate element in the page what the language of the page is.
125  // Can only be called if a translation has happened and was successful.
126  // Returns the language code on success, an empty string on failure.
127  virtual std::string GetOriginalPageLanguage();
128
129  // Adjusts a delay time for a posted task. This is used in tests to do tasks
130  // immediately by returning 0.
131  virtual base::TimeDelta AdjustDelay(int delayInMs);
132
133  // Executes the JavaScript code in |script| in the main frame of RenderView.
134  virtual void ExecuteScript(const std::string& script);
135
136  // Executes the JavaScript code in |script| in the main frame of RenderView,
137  // and returns the boolean returned by the script evaluation if the script was
138  // run successfully. Otherwise, returns |fallback| value.
139  virtual bool ExecuteScriptAndGetBoolResult(const std::string& script,
140                                             bool fallback);
141
142  // Executes the JavaScript code in |script| in the main frame of RenderView,
143  // and returns the string returned by the script evaluation if the script was
144  // run successfully. Otherwise, returns empty string.
145  virtual std::string ExecuteScriptAndGetStringResult(
146      const std::string& script);
147
148  // Executes the JavaScript code in |script| in the main frame of RenderView.
149  // and returns the number returned by the script evaluation if the script was
150  // run successfully. Otherwise, returns 0.0.
151  virtual double ExecuteScriptAndGetDoubleResult(const std::string& script);
152
153 private:
154  FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, AdoptHtmlLang);
155  FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest,
156                           CLDAgreeWithLanguageCodeHavingCountryCode);
157  FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest,
158                           CLDDisagreeWithWrongLanguageCode);
159  FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest,
160                           InvalidLanguageMetaTagProviding);
161  FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, LanguageCodeTypoCorrection);
162  FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, LanguageCodeSynonyms);
163  FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, ResetInvalidLanguageCode);
164  FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, SimilarLanguageCode);
165  FRIEND_TEST_ALL_PREFIXES(TranslateHelperTest, WellKnownWrongConfiguration);
166
167  enum LanguageDetectionTiming {
168    ON_TIME,   // Language detection was performed as soon as it was requested
169    DEFERRED,  // Language detection couldn't be performed when it was requested
170    RESUMED,   // A deferred language detection attempt was completed later
171    LANGUAGE_DETECTION_TIMING_MAX_VALUE  // The bounding value for this enum
172  };
173
174  // Converts language code to the one used in server supporting list.
175  static void ConvertLanguageCodeSynonym(std::string* code);
176
177  // Returns whether the page associated with |document| is a candidate for
178  // translation.  Some pages can explictly specify (via a meta-tag) that they
179  // should not be translated.
180  static bool IsTranslationAllowed(blink::WebDocument* document);
181
182  // RenderViewObserver implementation.
183  virtual bool OnMessageReceived(const IPC::Message& message) OVERRIDE;
184
185  // Informs us that the page's text has been extracted.
186  void PageCapturedImpl(int page_seq_no, const base::string16& contents);
187
188  // Cancels any translation that is currently being performed.  This does not
189  // revert existing translations.
190  void CancelPendingTranslation();
191
192  // Checks if the current running page translation is finished or errored and
193  // notifies the browser accordingly.  If the translation has not terminated,
194  // posts a task to check again later.
195  void CheckTranslateStatus(int page_seq_no);
196
197  // Called by TranslatePage to do the actual translation.  |count| is used to
198  // limit the number of retries.
199  void TranslatePageImpl(int page_seq_no, int count);
200
201  // Sends a message to the browser to notify it that the translation failed
202  // with |error|.
203  void NotifyBrowserTranslationFailed(TranslateErrors::Type error);
204
205  // Convenience method to access the main frame.  Can return NULL, typically
206  // if the page is being closed.
207  blink::WebFrame* GetMainFrame();
208
209  // Do not ask for CLD data any more.
210  void CancelCldDataPolling();
211
212  // Invoked when PageCaptured is called prior to obtaining CLD data. This
213  // method stores the page ID into deferred_page_id_ and COPIES the contents
214  // of the page, then sets deferred_page_capture_ to true. When CLD data is
215  // eventually received (in OnCldDataAvailable), any deferred request will be
216  // "resurrected" and allowed to proceed automatically, assuming that the
217  // page ID has not changed.
218  void DeferPageCaptured(const int page_id, const base::string16& contents);
219
220  // Start polling for CLD data.
221  // Polling will automatically halt as soon as the renderer obtains a
222  // reference to the data file.
223  void SendCldDataRequest(const int delay_millis, const int next_delay_millis);
224
225  // Callback triggered when CLD data becomes available.
226  void OnCldDataAvailable();
227
228  // Record the timing of language detection, immediately sending an IPC-based
229  // histogram delta update to the browser process in case the hosting renderer
230  // process terminates before the metrics would otherwise be transferred.
231  void RecordLanguageDetectionTiming(LanguageDetectionTiming timing);
232
233  // An ever-increasing sequence number of the current page, used to match up
234  // translation requests with responses.
235  int page_seq_no_;
236
237  // The states associated with the current translation.
238  bool translation_pending_;
239  std::string source_lang_;
240  std::string target_lang_;
241
242  // Time when a page langauge is determined. This is used to know a duration
243  // time from showing infobar to requesting translation.
244  base::TimeTicks language_determined_time_;
245
246  // Provides CLD data for this process.
247  scoped_ptr<RendererCldDataProvider> cld_data_provider_;
248
249  // Whether or not polling for CLD2 data has started.
250  bool cld_data_polling_started_;
251
252  // Whether or not CancelCldDataPolling has been called.
253  bool cld_data_polling_canceled_;
254
255  // Whether or not a PageCaptured event arrived prior to CLD data becoming
256  // available. If true, deferred_contents_ contains the most recent contents.
257  bool deferred_page_capture_;
258
259  // The ID of the page most recently reported to PageCaptured if
260  // deferred_page_capture_ is true.
261  int deferred_page_seq_no_;
262
263  // The world ID to use for script execution.
264  int world_id_;
265
266  // The extension group.
267  int extension_group_;
268
269  // The URL scheme for translate extensions.
270  std::string extension_scheme_;
271
272  // The contents of the page most recently reported to PageCaptured if
273  // deferred_page_capture_ is true.
274  base::string16 deferred_contents_;
275
276  // Method factory used to make calls to TranslatePageImpl.
277  base::WeakPtrFactory<TranslateHelper> weak_method_factory_;
278
279  DISALLOW_COPY_AND_ASSIGN(TranslateHelper);
280};
281
282}  // namespace translate
283
284#endif  // COMPONENTS_TRANSLATE_CONTENT_RENDERER_TRANSLATE_HELPER_H_
285