get_search_engines.py revision d26706538834e0ed58bf28f08d9a2885c0e7efcb
1#!/usr/bin/python2.4
2#
3# Copyright (C) 2010 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17"""
18Creates the list of search engines
19
20The created list is placed in the res/values-<locale> directory. Also updates
21res/values/all_search_engines.xml if required with new data.
22
23Usage: get_search_engines.py
24
25Copyright (C) 2010 The Android Open Source Project
26"""
27
28import os
29import re
30import sys
31import urllib
32from xml.dom import minidom
33
34# Locales to generate search engine lists for
35locales = ["cs-CZ", "da-DK", "de-AT", "de-CH", "de-DE", "el-GR", "en-AU",
36    "en-GB", "en-IE", "en-NZ", "en-SG", "en-ZA", "es-ES", "fr-BE", "fr-FR",
37    "it-IT", "ja-JP", "ko-KR", "nb-NO", "nl-BE", "nl-NL", "pl-PL", "pt-PT",
38    "pt-BR", "ru-RU", "sv-SE", "tr-TR", "zh-CN", "zh-HK", "zh-MO", "zh-TW"]
39
40class SearchEngineManager(object):
41  """Manages list of search engines and creates locale specific lists.
42
43  The main method useful for the caller is generateListForLocale(), which
44  creates a locale specific search_engines.xml file suitable for use by the
45  Android WebSearchProvider implementation.
46  """
47
48  def __init__(self):
49    """Inits SearchEngineManager with relevant search engine data.
50
51    The search engine data is downloaded from the Chrome source repository.
52    """
53    self.chrome_data = urllib.urlopen(
54        'http://src.chromium.org/viewvc/chrome/trunk/src/chrome/'
55        'browser/search_engines/template_url_prepopulate_data.cc').read()
56    if self.chrome_data.lower().find('repository not found') != -1:
57      print 'Unable to get Chrome source data for search engine list.\nExiting.'
58      sys.exit(2)
59
60    self.resdir = os.path.normpath(os.path.join(sys.path[0], '../res'))
61
62    self.all_engines = set()
63
64  def getXmlString(self, str):
65    """Returns an XML-safe string for the given string.
66
67    Given a string from the search engine data structure, convert it to a
68    string suitable to write to our XML data file by stripping away NULLs,
69    unwanted quotes, wide-string declarations (L"") and replacing C-style
70    unicode characters with XML equivalents.
71    """
72    str = str.strip()
73    if str.upper() == 'NULL':
74      return ''
75
76    if str.startswith('L"'):
77      str = str[2:]
78    if str.startswith('@') or str.startswith('?'):
79      str = '\\' + str
80
81    str = str.strip('"')
82    str = str.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
83    str = str.replace('"', '&quot;').replace('\'', '&apos;')
84    str = re.sub(r'\\x([a-fA-F0-9]+)', r'&#x\1;', str)
85
86    return str
87
88  def getEngineData(self, name):
89    """Returns an array of strings describing the specified search engine.
90
91    The returned strings are in the same order as in the Chrome source data file
92    except that the internal name of the search engine is inserted at the
93    beginning of the list.
94    """
95    # Find the first occurance of this search engine name in the form
96    # " <name> =" in the chrome data file.
97    re_exp = '\s' + name + '\s*='
98    search_obj = re.search(re_exp, self.chrome_data)
99    if not search_obj:
100      print ('Unable to find data for search engine ' + name +
101             '. Please check the chrome data file for format changes.')
102      return None
103
104    # Extract the struct declaration between the curly braces.
105    start_pos = self.chrome_data.find('{', search_obj.start()) + 1;
106    end_pos = self.chrome_data.find('};', start_pos);
107    engine_data_str = self.chrome_data[start_pos:end_pos]
108
109    # Remove c++ style '//' comments at the ends of each line
110    engine_data_lines = engine_data_str.split('\n')
111    engine_data_str = ""
112    for line in engine_data_lines:
113        start_pos = line.find(' // ')
114        if start_pos != -1:
115            line = line[:start_pos]
116        engine_data_str = engine_data_str + line + '\n'
117
118    # Join multiple line strings into a single string.
119    engine_data_str = re.sub('\"\s+\"', '', engine_data_str)
120    engine_data_str = re.sub('\"\s+L\"', '', engine_data_str)
121    engine_data_str = engine_data_str.replace('"L"', '')
122
123    engine_data = engine_data_str.split(',')
124    for i in range(len(engine_data)):
125      engine_data[i] = self.getXmlString(engine_data[i])
126
127    # If the last element was an empty string (due to an extra comma at the
128    # end), ignore it.
129    if not engine_data[len(engine_data) - 1]:
130      engine_data.pop()
131
132    engine_data.insert(0, name)
133
134    return engine_data
135
136  def getSearchEnginesForCountry(self, country):
137    """Returns the list of search engine names for the given country.
138
139    The data comes from the Chrome data file.
140    """
141    # The Chrome data file has an array defined with the name 'engines_XX'
142    # where XX = country.
143    pos = self.chrome_data.find('engines_' + country)
144    if pos == -1:
145      print ('Unable to find search engine data for country ' + country + '.')
146      return
147
148    # Extract the text between the curly braces for this array declaration
149    engines_start = self.chrome_data.find('{', pos) + 1;
150    engines_end = self.chrome_data.find('}', engines_start);
151    engines_str = self.chrome_data[engines_start:engines_end]
152
153    # Remove embedded /**/ style comments, white spaces, address-of operators
154    # and the trailing comma if any.
155    engines_str = re.sub('\/\*.+\*\/', '', engines_str)
156    engines_str = re.sub('\s+', '', engines_str)
157    engines_str = engines_str.replace('&','')
158    engines_str = engines_str.rstrip(',')
159
160    # Split the array into it's elements
161    engines = engines_str.split(',')
162
163    return engines
164
165  def writeAllEngines(self):
166    """Writes all search engines to the all_search_engines.xml file.
167    """
168
169    all_search_engines_path = os.path.join(self.resdir, 'values/all_search_engines.xml')
170
171    text = []
172
173    for engine_name in self.all_engines:
174      engine_data = self.getEngineData(engine_name)
175      text.append('  <string-array name="%s" translatable="false">\n' % (engine_data[0]))
176      for i in range(1, 7):
177        text.append('    <item>%s</item>\n' % (engine_data[i]))
178      text.append('  </string-array>\n')
179      print engine_data[1] + " added to all_search_engines.xml"
180
181    self.generateXmlFromTemplate(os.path.join(sys.path[0], 'all_search_engines.template.xml'),
182        all_search_engines_path, text)
183
184  def generateDefaultList(self):
185    self.writeEngineList(os.path.join(self.resdir, 'values'), "default")
186
187  def generateListForLocale(self, locale):
188    """Creates a new locale specific search_engines.xml file.
189
190    The new file contains search engines specific to that country. If required
191    this function updates all_search_engines.xml file with any new search
192    engine data necessary.
193    """
194    separator_pos = locale.find('-')
195    if separator_pos == -1:
196      print ('Locale must be of format <language>-<country>. For e.g.'
197             ' "es-US" or "en-GB"')
198      return
199
200    language = locale[0:separator_pos]
201    country = locale[separator_pos + 1:].upper()
202    dir_path = os.path.join(self.resdir, 'values-' + language + '-r' + country)
203
204    self.writeEngineList(dir_path, country)
205
206  def writeEngineList(self, dir_path, country):
207    if os.path.exists(dir_path) and not os.path.isdir(dir_path):
208      print "File exists in output directory path " + dir_path + ". Please remove it and try again."
209      return
210
211    engines = self.getSearchEnginesForCountry(country)
212    if not engines:
213      return
214    for engine in engines:
215      self.all_engines.add(engine)
216
217    # Create the locale specific search_engines.xml file. Each
218    # search_engines.xml file has a hardcoded list of 7 items. If there are less
219    # than 7 search engines for this country, the remaining items are marked as
220    # enabled=false.
221    text = []
222    text.append('  <string-array name="search_engines" translatable="false">\n');
223    for engine in engines:
224      engine_data = self.getEngineData(engine)
225      name = engine_data[0]
226      text.append('    <item>%s</item>\n' % (name))
227    text.append('  </string-array>\n');
228
229    self.generateXmlFromTemplate(os.path.join(sys.path[0], 'search_engines.template.xml'),
230        os.path.join(dir_path, 'search_engines.xml'),
231        text)
232
233  def generateXmlFromTemplate(self, template_path, out_path, text):
234    # Load the template file and insert the new contents before the last line.
235    template_text = open(template_path).read()
236    pos = template_text.rfind('\n', 0, -2) + 1
237    contents = template_text[0:pos] + ''.join(text) + template_text[pos:]
238
239    # Make sure what we have created is valid XML :) No need to check for errors
240    # as the script will terminate with an exception if the XML was malformed.
241    engines_dom = minidom.parseString(contents)
242
243    dir_path = os.path.dirname(out_path)
244    if not os.path.exists(dir_path):
245      os.makedirs(dir_path)
246      print 'Created directory ' + dir_path
247    file = open(out_path, 'w')
248    file.write(contents)
249    file.close()
250    print 'Wrote ' + out_path
251
252if __name__ == "__main__":
253  manager = SearchEngineManager()
254  manager.generateDefaultList()
255  for locale in locales:
256    manager.generateListForLocale(locale)
257  manager.writeAllEngines()
258
259