1#!/usr/bin/python2.4
2#
3# Copyright (C) 2010 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17"""
18Creates the list of search engines
19
20The created list is placed in the res/values-<locale> directory. Also updates
21res/values/all_search_engines.xml if required with new data.
22
23Usage: get_search_engines.py
24
25Copyright (C) 2010 The Android Open Source Project
26"""
27
28import os
29import re
30import sys
31import urllib
32from xml.dom import minidom
33
34# Locales to generate search engine lists for
35locales = ["cs-CZ", "da-DK", "de-AT", "de-CH", "de-DE", "el-GR", "en-AU",
36    "en-GB", "en-IE", "en-NZ", "en-SG", "en-ZA", "es-ES", "fr-BE", "fr-FR",
37    "it-IT", "ja-JP", "ko-KR", "nb-NO", "nl-BE", "nl-NL", "pl-PL", "pt-PT",
38    "pt-BR", "ru-RU", "sv-SE", "tr-TR", "zh-CN", "zh-HK", "zh-MO", "zh-TW"]
39
40google_data = ["google", "Google", "google.com",
41  "http://www.google.com/favicon.ico",
42  "http://www.google.com/search?hl={language}&amp;ie={inputEncoding}&amp;source=android-browser&amp;q={searchTerms}",
43  "UTF-8",
44  "http://www.google.com/complete/search?hl={language}&amp;client=android&amp;q={searchTerms}"]
45
46class SearchEngineManager(object):
47  """Manages list of search engines and creates locale specific lists.
48
49  The main method useful for the caller is generateListForLocale(), which
50  creates a locale specific donottranslate-search_engines.xml file.
51  """
52
53  def __init__(self):
54    """Inits SearchEngineManager with relevant search engine data.
55
56    The search engine data is downloaded from the Chrome source repository.
57    """
58    self.chrome_data = urllib.urlopen(
59        'http://src.chromium.org/viewvc/chrome/trunk/src/chrome/'
60        'browser/search_engines/template_url_prepopulate_data.cc').read()
61    if self.chrome_data.lower().find('repository not found') != -1:
62      print 'Unable to get Chrome source data for search engine list.\nExiting.'
63      sys.exit(2)
64
65    self.resdir = os.path.normpath(os.path.join(sys.path[0], '../res'))
66
67    self.all_engines = set()
68
69  def getXmlString(self, str):
70    """Returns an XML-safe string for the given string.
71
72    Given a string from the search engine data structure, convert it to a
73    string suitable to write to our XML data file by stripping away NULLs,
74    unwanted quotes, wide-string declarations (L"") and replacing C-style
75    unicode characters with XML equivalents.
76    """
77    str = str.strip()
78    if str.upper() == 'NULL':
79      return ''
80
81    if str.startswith('L"'):
82      str = str[2:]
83    if str.startswith('@') or str.startswith('?'):
84      str = '\\' + str
85
86    str = str.strip('"')
87    str = str.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
88    str = str.replace('"', '&quot;').replace('\'', '&apos;')
89    str = re.sub(r'\\x([a-fA-F0-9]{1,4})', r'&#x\1;', str)
90
91    return str
92
93  def getEngineData(self, name):
94    """Returns an array of strings describing the specified search engine.
95
96    The returned strings are in the same order as in the Chrome source data file
97    except that the internal name of the search engine is inserted at the
98    beginning of the list.
99    """
100
101    if name == "google":
102      return google_data
103
104    # Find the first occurance of this search engine name in the form
105    # " <name> =" in the chrome data file.
106    re_exp = '\s' + name + '\s*='
107    search_obj = re.search(re_exp, self.chrome_data)
108    if not search_obj:
109      print ('Unable to find data for search engine ' + name +
110             '. Please check the chrome data file for format changes.')
111      return None
112
113    # Extract the struct declaration between the curly braces.
114    start_pos = self.chrome_data.find('{', search_obj.start()) + 1;
115    end_pos = self.chrome_data.find('};', start_pos);
116    engine_data_str = self.chrome_data[start_pos:end_pos]
117
118    # Remove c++ style '//' comments at the ends of each line
119    engine_data_lines = engine_data_str.split('\n')
120    engine_data_str = ""
121    for line in engine_data_lines:
122        start_pos = line.find(' // ')
123        if start_pos != -1:
124            line = line[:start_pos]
125        engine_data_str = engine_data_str + line + '\n'
126
127    # Join multiple line strings into a single string.
128    engine_data_str = re.sub('\"\s+\"', '', engine_data_str)
129    engine_data_str = re.sub('\"\s+L\"', '', engine_data_str)
130    engine_data_str = engine_data_str.replace('"L"', '')
131
132    engine_data = engine_data_str.split(',')
133    for i in range(len(engine_data)):
134      engine_data[i] = self.getXmlString(engine_data[i])
135
136    # If the last element was an empty string (due to an extra comma at the
137    # end), ignore it.
138    if not engine_data[len(engine_data) - 1]:
139      engine_data.pop()
140
141    engine_data.insert(0, name)
142
143    return engine_data
144
145  def getSearchEnginesForCountry(self, country):
146    """Returns the list of search engine names for the given country.
147
148    The data comes from the Chrome data file.
149    """
150    # The Chrome data file has an array defined with the name 'engines_XX'
151    # where XX = country.
152    pos = self.chrome_data.find('engines_' + country)
153    if pos == -1:
154      print ('Unable to find search engine data for country ' + country + '.')
155      return
156
157    # Extract the text between the curly braces for this array declaration
158    engines_start = self.chrome_data.find('{', pos) + 1;
159    engines_end = self.chrome_data.find('}', engines_start);
160    engines_str = self.chrome_data[engines_start:engines_end]
161
162    # Remove embedded /**/ style comments, white spaces, address-of operators
163    # and the trailing comma if any.
164    engines_str = re.sub('\/\*.+\*\/', '', engines_str)
165    engines_str = re.sub('\s+', '', engines_str)
166    engines_str = engines_str.replace('&','')
167    engines_str = engines_str.rstrip(',')
168
169    # Split the array into it's elements
170    engines = engines_str.split(',')
171
172    return engines
173
174  def writeAllEngines(self):
175    """Writes all search engines to the all_search_engines.xml file.
176    """
177
178    all_search_engines_path = os.path.join(self.resdir, 'values/all_search_engines.xml')
179
180    text = []
181
182    for engine_name in self.all_engines:
183      engine_data = self.getEngineData(engine_name)
184      text.append('  <string-array name="%s" translatable="false">\n' % (engine_data[0]))
185      for i in range(1, 7):
186        text.append('    <item>%s</item>\n' % (engine_data[i]))
187      text.append('  </string-array>\n')
188      print engine_data[1] + " added to all_search_engines.xml"
189
190    self.generateXmlFromTemplate(os.path.join(sys.path[0], 'all_search_engines.template.xml'),
191        all_search_engines_path, text)
192
193  def generateDefaultList(self):
194    self.writeEngineList(os.path.join(self.resdir, 'values'), "default")
195
196  def generateListForLocale(self, locale):
197    """Creates a new locale specific donottranslate-search_engines.xml file.
198
199    The new file contains search engines specific to that country. If required
200    this function updates all_search_engines.xml file with any new search
201    engine data necessary.
202    """
203    separator_pos = locale.find('-')
204    if separator_pos == -1:
205      print ('Locale must be of format <language>-<country>. For e.g.'
206             ' "es-US" or "en-GB"')
207      return
208
209    language = locale[0:separator_pos]
210    country = locale[separator_pos + 1:].upper()
211    dir_path = os.path.join(self.resdir, 'values-' + language + '-r' + country)
212
213    self.writeEngineList(dir_path, country)
214
215  def writeEngineList(self, dir_path, country):
216    if os.path.exists(dir_path) and not os.path.isdir(dir_path):
217      print "File exists in output directory path " + dir_path + ". Please remove it and try again."
218      return
219
220    engines = self.getSearchEnginesForCountry(country)
221    if not engines:
222      return
223    for engine in engines:
224      self.all_engines.add(engine)
225
226    # Create the locale specific search_engines.xml file. Each
227    # search_engines.xml file has a hardcoded list of 7 items. If there are less
228    # than 7 search engines for this country, the remaining items are marked as
229    # enabled=false.
230    text = []
231    text.append('  <string-array name="search_engines" translatable="false">\n');
232    for engine in engines:
233      engine_data = self.getEngineData(engine)
234      name = engine_data[0]
235      text.append('    <item>%s</item>\n' % (name))
236    text.append('  </string-array>\n');
237
238    self.generateXmlFromTemplate(os.path.join(sys.path[0], 'search_engines.template.xml'),
239        os.path.join(dir_path, 'donottranslate-search_engines.xml'),
240        text)
241
242  def generateXmlFromTemplate(self, template_path, out_path, text):
243    # Load the template file and insert the new contents before the last line.
244    template_text = open(template_path).read()
245    pos = template_text.rfind('\n', 0, -2) + 1
246    contents = template_text[0:pos] + ''.join(text) + template_text[pos:]
247
248    # Make sure what we have created is valid XML :) No need to check for errors
249    # as the script will terminate with an exception if the XML was malformed.
250    engines_dom = minidom.parseString(contents)
251
252    dir_path = os.path.dirname(out_path)
253    if not os.path.exists(dir_path):
254      os.makedirs(dir_path)
255      print 'Created directory ' + dir_path
256    file = open(out_path, 'w')
257    file.write(contents)
258    file.close()
259    print 'Wrote ' + out_path
260
261if __name__ == "__main__":
262  manager = SearchEngineManager()
263  manager.generateDefaultList()
264  for locale in locales:
265    manager.generateListForLocale(locale)
266  manager.writeAllEngines()
267