get_search_engines.py revision 4346564c1f6faefff5e0d3fdc7f189ec2e948019
1#!/usr/bin/python2.4
2#
3# Copyright (C) 2010 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17"""
18Creates the list of search engines
19
20The created list is placed in the res/values-<locale> directory. Also updates
21res/values/all_search_engines.xml if required with new data.
22
23Usage: get_search_engines.py
24
25Copyright (C) 2010 The Android Open Source Project
26"""
27
28import os
29import re
30import sys
31import urllib
32from xml.dom import minidom
33
34# Locales to generate search engine lists for
35locales = ["cs-CZ", "da-DK", "de-AT", "de-CH", "de-DE", "el-GR", "en-AU",
36    "en-GB", "en-IE", "en-NZ", "en-SG", "en-ZA", "es-ES", "fr-BE", "fr-FR",
37    "it-IT", "ja-JP", "ko-KR", "nb-NO", "nl-BE", "nl-NL", "pl-PL", "pt-PT",
38    "pt-BR", "ru-RU", "sv-SE", "tr-TR", "zh-CN", "zh-HK", "zh-MO", "zh-TW"]
39
40google_data = ["google", "Google", "google.com",
41  "http://www.google.com/favicon.ico",
42  "http://www.google.com/m?hl={language}&amp;ie={inputEncoding}&amp;source=android-browser&amp;q={searchTerms}",
43  "UTF-8",
44  "http://www.google.com/complete/search?hl={language}&amp;json=true&amp;q={searchTerms}"]
45
46class SearchEngineManager(object):
47  """Manages list of search engines and creates locale specific lists.
48
49  The main method useful for the caller is generateListForLocale(), which
50  creates a locale specific search_engines.xml file suitable for use by the
51  Android WebSearchProvider implementation.
52  """
53
54  def __init__(self):
55    """Inits SearchEngineManager with relevant search engine data.
56
57    The search engine data is downloaded from the Chrome source repository.
58    """
59    self.chrome_data = urllib.urlopen(
60        'http://src.chromium.org/viewvc/chrome/trunk/src/chrome/'
61        'browser/search_engines/template_url_prepopulate_data.cc').read()
62    if self.chrome_data.lower().find('repository not found') != -1:
63      print 'Unable to get Chrome source data for search engine list.\nExiting.'
64      sys.exit(2)
65
66    self.resdir = os.path.normpath(os.path.join(sys.path[0], '../res'))
67
68    self.all_engines = set()
69
70  def getXmlString(self, str):
71    """Returns an XML-safe string for the given string.
72
73    Given a string from the search engine data structure, convert it to a
74    string suitable to write to our XML data file by stripping away NULLs,
75    unwanted quotes, wide-string declarations (L"") and replacing C-style
76    unicode characters with XML equivalents.
77    """
78    str = str.strip()
79    if str.upper() == 'NULL':
80      return ''
81
82    if str.startswith('L"'):
83      str = str[2:]
84    if str.startswith('@') or str.startswith('?'):
85      str = '\\' + str
86
87    str = str.strip('"')
88    str = str.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
89    str = str.replace('"', '&quot;').replace('\'', '&apos;')
90    str = re.sub(r'\\x([a-fA-F0-9]+)', r'&#x\1;', str)
91
92    return str
93
94  def getEngineData(self, name):
95    """Returns an array of strings describing the specified search engine.
96
97    The returned strings are in the same order as in the Chrome source data file
98    except that the internal name of the search engine is inserted at the
99    beginning of the list.
100    """
101
102    if name == "google":
103      return google_data
104
105    # Find the first occurance of this search engine name in the form
106    # " <name> =" in the chrome data file.
107    re_exp = '\s' + name + '\s*='
108    search_obj = re.search(re_exp, self.chrome_data)
109    if not search_obj:
110      print ('Unable to find data for search engine ' + name +
111             '. Please check the chrome data file for format changes.')
112      return None
113
114    # Extract the struct declaration between the curly braces.
115    start_pos = self.chrome_data.find('{', search_obj.start()) + 1;
116    end_pos = self.chrome_data.find('};', start_pos);
117    engine_data_str = self.chrome_data[start_pos:end_pos]
118
119    # Remove c++ style '//' comments at the ends of each line
120    engine_data_lines = engine_data_str.split('\n')
121    engine_data_str = ""
122    for line in engine_data_lines:
123        start_pos = line.find(' // ')
124        if start_pos != -1:
125            line = line[:start_pos]
126        engine_data_str = engine_data_str + line + '\n'
127
128    # Join multiple line strings into a single string.
129    engine_data_str = re.sub('\"\s+\"', '', engine_data_str)
130    engine_data_str = re.sub('\"\s+L\"', '', engine_data_str)
131    engine_data_str = engine_data_str.replace('"L"', '')
132
133    engine_data = engine_data_str.split(',')
134    for i in range(len(engine_data)):
135      engine_data[i] = self.getXmlString(engine_data[i])
136
137    # If the last element was an empty string (due to an extra comma at the
138    # end), ignore it.
139    if not engine_data[len(engine_data) - 1]:
140      engine_data.pop()
141
142    engine_data.insert(0, name)
143
144    return engine_data
145
146  def getSearchEnginesForCountry(self, country):
147    """Returns the list of search engine names for the given country.
148
149    The data comes from the Chrome data file.
150    """
151    # The Chrome data file has an array defined with the name 'engines_XX'
152    # where XX = country.
153    pos = self.chrome_data.find('engines_' + country)
154    if pos == -1:
155      print ('Unable to find search engine data for country ' + country + '.')
156      return
157
158    # Extract the text between the curly braces for this array declaration
159    engines_start = self.chrome_data.find('{', pos) + 1;
160    engines_end = self.chrome_data.find('}', engines_start);
161    engines_str = self.chrome_data[engines_start:engines_end]
162
163    # Remove embedded /**/ style comments, white spaces, address-of operators
164    # and the trailing comma if any.
165    engines_str = re.sub('\/\*.+\*\/', '', engines_str)
166    engines_str = re.sub('\s+', '', engines_str)
167    engines_str = engines_str.replace('&','')
168    engines_str = engines_str.rstrip(',')
169
170    # Split the array into it's elements
171    engines = engines_str.split(',')
172
173    return engines
174
175  def writeAllEngines(self):
176    """Writes all search engines to the all_search_engines.xml file.
177    """
178
179    all_search_engines_path = os.path.join(self.resdir, 'values/all_search_engines.xml')
180
181    text = []
182
183    for engine_name in self.all_engines:
184      engine_data = self.getEngineData(engine_name)
185      text.append('  <string-array name="%s" translatable="false">\n' % (engine_data[0]))
186      for i in range(1, 7):
187        text.append('    <item>%s</item>\n' % (engine_data[i]))
188      text.append('  </string-array>\n')
189      print engine_data[1] + " added to all_search_engines.xml"
190
191    self.generateXmlFromTemplate(os.path.join(sys.path[0], 'all_search_engines.template.xml'),
192        all_search_engines_path, text)
193
194  def generateDefaultList(self):
195    self.writeEngineList(os.path.join(self.resdir, 'values'), "default")
196
197  def generateListForLocale(self, locale):
198    """Creates a new locale specific search_engines.xml file.
199
200    The new file contains search engines specific to that country. If required
201    this function updates all_search_engines.xml file with any new search
202    engine data necessary.
203    """
204    separator_pos = locale.find('-')
205    if separator_pos == -1:
206      print ('Locale must be of format <language>-<country>. For e.g.'
207             ' "es-US" or "en-GB"')
208      return
209
210    language = locale[0:separator_pos]
211    country = locale[separator_pos + 1:].upper()
212    dir_path = os.path.join(self.resdir, 'values-' + language + '-r' + country)
213
214    self.writeEngineList(dir_path, country)
215
216  def writeEngineList(self, dir_path, country):
217    if os.path.exists(dir_path) and not os.path.isdir(dir_path):
218      print "File exists in output directory path " + dir_path + ". Please remove it and try again."
219      return
220
221    engines = self.getSearchEnginesForCountry(country)
222    if not engines:
223      return
224    for engine in engines:
225      self.all_engines.add(engine)
226
227    # Create the locale specific search_engines.xml file. Each
228    # search_engines.xml file has a hardcoded list of 7 items. If there are less
229    # than 7 search engines for this country, the remaining items are marked as
230    # enabled=false.
231    text = []
232    text.append('  <string-array name="search_engines" translatable="false">\n');
233    for engine in engines:
234      engine_data = self.getEngineData(engine)
235      name = engine_data[0]
236      text.append('    <item>%s</item>\n' % (name))
237    text.append('  </string-array>\n');
238
239    self.generateXmlFromTemplate(os.path.join(sys.path[0], 'search_engines.template.xml'),
240        os.path.join(dir_path, 'search_engines.xml'),
241        text)
242
243  def generateXmlFromTemplate(self, template_path, out_path, text):
244    # Load the template file and insert the new contents before the last line.
245    template_text = open(template_path).read()
246    pos = template_text.rfind('\n', 0, -2) + 1
247    contents = template_text[0:pos] + ''.join(text) + template_text[pos:]
248
249    # Make sure what we have created is valid XML :) No need to check for errors
250    # as the script will terminate with an exception if the XML was malformed.
251    engines_dom = minidom.parseString(contents)
252
253    dir_path = os.path.dirname(out_path)
254    if not os.path.exists(dir_path):
255      os.makedirs(dir_path)
256      print 'Created directory ' + dir_path
257    file = open(out_path, 'w')
258    file.write(contents)
259    file.close()
260    print 'Wrote ' + out_path
261
262if __name__ == "__main__":
263  manager = SearchEngineManager()
264  manager.generateDefaultList()
265  for locale in locales:
266    manager.generateListForLocale(locale)
267  manager.writeAllEngines()
268
269