1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/test/perf/generate_profile.h"
6
7#include "base/at_exit.h"
8#include "base/command_line.h"
9#include "base/file_util.h"
10#include "base/files/file_enumerator.h"
11#include "base/files/file_path.h"
12#include "base/i18n/icu_util.h"
13#include "base/logging.h"
14#include "base/message_loop/message_loop.h"
15#include "base/path_service.h"
16#include "base/strings/string_number_conversions.h"
17#include "base/strings/utf_string_conversions.h"
18#include "base/time/time.h"
19#include "chrome/browser/history/history_service.h"
20#include "chrome/browser/history/history_service_factory.h"
21#include "chrome/browser/history/top_sites.h"
22#include "chrome/common/chrome_paths.h"
23#include "chrome/common/thumbnail_score.h"
24#include "chrome/test/base/testing_browser_process.h"
25#include "chrome/test/base/testing_profile.h"
26#include "chrome/tools/profiles/thumbnail-inl.h"
27#include "content/public/browser/browser_thread.h"
28#include "content/public/browser/notification_service.h"
29#include "content/public/test/test_browser_thread.h"
30#include "third_party/skia/include/core/SkBitmap.h"
31#include "ui/base/resource/resource_bundle.h"
32#include "ui/base/ui_base_paths.h"
33#include "ui/gfx/codec/jpeg_codec.h"
34
35using base::Time;
36using content::BrowserThread;
37
38namespace {
39
40// Probabilities of different word lengths, as measured from Darin's profile.
41//   kWordLengthProbabilities[n-1] = P(word of length n)
42const float kWordLengthProbabilities[] = { 0.069f, 0.132f, 0.199f,
43  0.137f, 0.088f, 0.115f, 0.081f, 0.055f, 0.034f, 0.021f, 0.019f, 0.018f,
44  0.007f, 0.007f, 0.005f, 0.004f, 0.003f, 0.003f, 0.003f };
45
46// Return a float uniformly in [0,1].
47// Useful for making probabilistic decisions.
48inline float RandomFloat() {
49  return rand() / static_cast<float>(RAND_MAX);
50}
51
52// Return an integer uniformly in [min,max).
53inline int RandomInt(int min, int max) {
54  return min + (rand() % (max-min));
55}
56
57// Return a string of |count| lowercase random characters.
58string16 RandomChars(int count) {
59  string16 str;
60  for (int i = 0; i < count; ++i)
61    str += L'a' + rand() % 26;
62  return str;
63}
64
65string16 RandomWord() {
66  // TODO(evanm): should we instead use the markov chain based
67  // version of this that I already wrote?
68
69  // Sample a word length from kWordLengthProbabilities.
70  float sample = RandomFloat();
71  size_t i;
72  for (i = 0; i < arraysize(kWordLengthProbabilities); ++i) {
73    sample -= kWordLengthProbabilities[i];
74    if (sample < 0) break;
75  }
76  const int word_length = i + 1;
77  return RandomChars(word_length);
78}
79
80// Return a string of |count| random words.
81string16 RandomWords(int count) {
82  string16 str;
83  for (int i = 0; i < count; ++i) {
84    if (!str.empty())
85      str += L' ';
86    str += RandomWord();
87  }
88  return str;
89}
90
91// Return a random URL-looking string.
92GURL ConstructRandomURL() {
93  return GURL(ASCIIToUTF16("http://") + RandomChars(3) + ASCIIToUTF16(".com/") +
94      RandomChars(RandomInt(5, 20)));
95}
96
97// Return a random page title-looking string.
98string16 ConstructRandomTitle() {
99  return RandomWords(RandomInt(3, 15));
100}
101
102// Insert a batch of |batch_size| URLs, starting at pageid |page_id|.
103void InsertURLBatch(Profile* profile,
104                    int page_id,
105                    int batch_size,
106                    int types) {
107  HistoryService* history_service =
108      HistoryServiceFactory::GetForProfile(profile, Profile::EXPLICIT_ACCESS);
109
110  // Probability of following a link on the current "page"
111  // (vs randomly jumping to a new page).
112  const float kFollowLinkProbability = 0.85f;
113  // Probability of visiting a page we've visited before.
114  const float kRevisitLinkProbability = 0.1f;
115  // Probability of a URL being "good enough" to revisit.
116  const float kRevisitableURLProbability = 0.05f;
117  // Probability of a URL being the end of a redirect chain.
118  const float kRedirectProbability = 0.05f;
119
120  // A list of URLs that we sometimes revisit.
121  std::vector<GURL> revisit_urls;
122
123  // Scoping value for page IDs (required by the history service).
124  void* id_scope = reinterpret_cast<void*>(1);
125
126  scoped_refptr<base::RefCountedMemory> google_bitmap(
127      new base::RefCountedStaticMemory(kGoogleThumbnail,
128                                       sizeof(kGoogleThumbnail)));
129  scoped_refptr<base::RefCountedMemory> weewar_bitmap(
130      new base::RefCountedStaticMemory(kWeewarThumbnail,
131                                       sizeof(kWeewarThumbnail)));
132
133  printf("Inserting %d URLs...\n", batch_size);
134  GURL previous_url;
135  content::PageTransition transition = content::PAGE_TRANSITION_TYPED;
136  const int end_page_id = page_id + batch_size;
137  history::TopSites* top_sites = profile->GetTopSites();
138  for (; page_id < end_page_id; ++page_id) {
139    // Randomly decide whether this new URL simulates following a link or
140    // whether it's a jump to a new URL.
141    if (!previous_url.is_empty() && RandomFloat() < kFollowLinkProbability) {
142      transition = content::PAGE_TRANSITION_LINK;
143    } else {
144      previous_url = GURL();
145      transition = content::PAGE_TRANSITION_TYPED;
146    }
147
148    // Pick a URL, either newly at random or from our list of previously
149    // visited URLs.
150    GURL url;
151    if (!revisit_urls.empty() && RandomFloat() < kRevisitLinkProbability) {
152      // Draw a URL from revisit_urls at random.
153      url = revisit_urls[RandomInt(0, static_cast<int>(revisit_urls.size()))];
154    } else {
155      url = ConstructRandomURL();
156    }
157
158    // Randomly construct a redirect chain.
159    history::RedirectList redirects;
160    if (RandomFloat() < kRedirectProbability) {
161      const int redir_count = RandomInt(1, 4);
162      for (int i = 0; i < redir_count; ++i)
163        redirects.push_back(ConstructRandomURL());
164      redirects.push_back(url);
165    }
166
167    // Add all of this information to the history service.
168    history_service->AddPage(url, base::Time::Now(),
169                             id_scope, page_id,
170                             previous_url, redirects,
171                             transition, history::SOURCE_BROWSED, true);
172    ThumbnailScore score(0.75, false, false);
173    history_service->SetPageTitle(url, ConstructRandomTitle());
174    if (types & TOP_SITES && top_sites) {
175      top_sites->SetPageThumbnailToJPEGBytes(
176          url,
177          (RandomInt(0, 2) == 0) ? google_bitmap.get() : weewar_bitmap.get(),
178          score);
179    }
180
181    previous_url = url;
182
183    if (revisit_urls.empty() || RandomFloat() < kRevisitableURLProbability)
184      revisit_urls.push_back(url);
185  }
186}
187
188}  // namespace
189
190bool GenerateProfile(GenerateProfileTypes types,
191                     int url_count,
192                     const base::FilePath& dst_dir) {
193  if (!base::CreateDirectory(dst_dir)) {
194    PLOG(ERROR) << "Unable to create directory " << dst_dir.value().c_str();
195    return false;
196  }
197
198  // We want this profile to be as deterministic as possible, so seed the
199  // random number generator with the number of urls we're generating.
200  srand(static_cast<unsigned int>(url_count));
201
202  printf("Creating profiles for testing...\n");
203
204  TestingBrowserProcessInitializer initialize_browser_process;
205  base::MessageLoopForUI message_loop;
206  content::TestBrowserThread ui_thread(BrowserThread::UI, &message_loop);
207  content::TestBrowserThread db_thread(BrowserThread::DB, &message_loop);
208  TestingProfile profile;
209  if (!profile.CreateHistoryService(false, false)) {
210      PLOG(ERROR) << "Creating history service failed";
211      return false;
212  }
213  if (types & TOP_SITES) {
214    profile.CreateTopSites();
215    profile.BlockUntilTopSitesLoaded();
216  }
217
218  // The maximum number of URLs to insert into history in one batch.
219  const int kBatchSize = 2000;
220  int page_id = 0;
221  while (page_id < url_count) {
222    const int batch_size = std::min(kBatchSize, url_count - page_id);
223    InsertURLBatch(&profile, page_id, batch_size, types);
224    // Run all pending messages to give TopSites a chance to catch up.
225    message_loop.RunUntilIdle();
226    page_id += batch_size;
227  }
228
229  profile.DestroyTopSites();
230  profile.DestroyHistoryService();
231
232  message_loop.RunUntilIdle();
233
234  base::FileEnumerator file_iterator(profile.GetPath(), false,
235                                     base::FileEnumerator::FILES);
236  base::FilePath path = file_iterator.Next();
237  while (!path.empty()) {
238    base::FilePath dst_file = dst_dir.Append(path.BaseName());
239    base::DeleteFile(dst_file, false);
240    if (!base::CopyFile(path, dst_file)) {
241      PLOG(ERROR) << "Copying file failed";
242      return false;
243    }
244    path = file_iterator.Next();
245  }
246
247  printf("Finished creating profiles for testing.\n");
248
249  // Restore the random seed.
250  srand(static_cast<unsigned int>(Time::Now().ToInternalValue()));
251
252  return true;
253}
254