get_search_engines.py revision 4346564c1f6faefff5e0d3fdc7f189ec2e948019
1d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert#!/usr/bin/python2.4
2d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert#
3d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert# Copyright (C) 2010 The Android Open Source Project
4d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert#
5d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert# Licensed under the Apache License, Version 2.0 (the "License");
6d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert# you may not use this file except in compliance with the License.
7d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert# You may obtain a copy of the License at
8d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert#
9d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert#      http://www.apache.org/licenses/LICENSE-2.0
10d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert#
11d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert# Unless required by applicable law or agreed to in writing, software
12d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert# distributed under the License is distributed on an "AS IS" BASIS,
13d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert# See the License for the specific language governing permissions and
15d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert# limitations under the License.
16d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert#
17d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert"""
18d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn BringertCreates the list of search engines
19d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
20d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn BringertThe created list is placed in the res/values-<locale> directory. Also updates
21d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringertres/values/all_search_engines.xml if required with new data.
22d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
23d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn BringertUsage: get_search_engines.py
24d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
25d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn BringertCopyright (C) 2010 The Android Open Source Project
26d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert"""
27d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
28d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringertimport os
29d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringertimport re
30d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringertimport sys
31d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringertimport urllib
32d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringertfrom xml.dom import minidom
33d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
34d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert# Locales to generate search engine lists for
35d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringertlocales = ["cs-CZ", "da-DK", "de-AT", "de-CH", "de-DE", "el-GR", "en-AU",
36d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    "en-GB", "en-IE", "en-NZ", "en-SG", "en-ZA", "es-ES", "fr-BE", "fr-FR",
37d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    "it-IT", "ja-JP", "ko-KR", "nb-NO", "nl-BE", "nl-NL", "pl-PL", "pt-PT",
38d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    "pt-BR", "ru-RU", "sv-SE", "tr-TR", "zh-CN", "zh-HK", "zh-MO", "zh-TW"]
39d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
404346564c1f6faefff5e0d3fdc7f189ec2e948019Bjorn Bringertgoogle_data = ["google", "Google", "google.com",
414346564c1f6faefff5e0d3fdc7f189ec2e948019Bjorn Bringert  "http://www.google.com/favicon.ico",
424346564c1f6faefff5e0d3fdc7f189ec2e948019Bjorn Bringert  "http://www.google.com/m?hl={language}&amp;ie={inputEncoding}&amp;source=android-browser&amp;q={searchTerms}",
434346564c1f6faefff5e0d3fdc7f189ec2e948019Bjorn Bringert  "UTF-8",
444346564c1f6faefff5e0d3fdc7f189ec2e948019Bjorn Bringert  "http://www.google.com/complete/search?hl={language}&amp;json=true&amp;q={searchTerms}"]
454346564c1f6faefff5e0d3fdc7f189ec2e948019Bjorn Bringert
46d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringertclass SearchEngineManager(object):
47d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert  """Manages list of search engines and creates locale specific lists.
48d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
49d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert  The main method useful for the caller is generateListForLocale(), which
50d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert  creates a locale specific search_engines.xml file suitable for use by the
51d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert  Android WebSearchProvider implementation.
52d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert  """
53d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
54d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert  def __init__(self):
55d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    """Inits SearchEngineManager with relevant search engine data.
56d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
57d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    The search engine data is downloaded from the Chrome source repository.
58d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    """
59d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    self.chrome_data = urllib.urlopen(
60d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert        'http://src.chromium.org/viewvc/chrome/trunk/src/chrome/'
61d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert        'browser/search_engines/template_url_prepopulate_data.cc').read()
62d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    if self.chrome_data.lower().find('repository not found') != -1:
63d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      print 'Unable to get Chrome source data for search engine list.\nExiting.'
64d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      sys.exit(2)
65d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
66d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    self.resdir = os.path.normpath(os.path.join(sys.path[0], '../res'))
67d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
68d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    self.all_engines = set()
69d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
70d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert  def getXmlString(self, str):
71d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    """Returns an XML-safe string for the given string.
72d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
73d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    Given a string from the search engine data structure, convert it to a
74d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    string suitable to write to our XML data file by stripping away NULLs,
75d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    unwanted quotes, wide-string declarations (L"") and replacing C-style
76d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    unicode characters with XML equivalents.
77d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    """
78d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    str = str.strip()
79d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    if str.upper() == 'NULL':
80d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      return ''
81d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
82d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    if str.startswith('L"'):
83d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      str = str[2:]
84d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    if str.startswith('@') or str.startswith('?'):
85d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      str = '\\' + str
86d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
87d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    str = str.strip('"')
88d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    str = str.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
89d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    str = str.replace('"', '&quot;').replace('\'', '&apos;')
90d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    str = re.sub(r'\\x([a-fA-F0-9]+)', r'&#x\1;', str)
91d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
92d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    return str
93d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
94d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert  def getEngineData(self, name):
95d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    """Returns an array of strings describing the specified search engine.
96d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
97d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    The returned strings are in the same order as in the Chrome source data file
98d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    except that the internal name of the search engine is inserted at the
99d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    beginning of the list.
100d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    """
1014346564c1f6faefff5e0d3fdc7f189ec2e948019Bjorn Bringert
1024346564c1f6faefff5e0d3fdc7f189ec2e948019Bjorn Bringert    if name == "google":
1034346564c1f6faefff5e0d3fdc7f189ec2e948019Bjorn Bringert      return google_data
1044346564c1f6faefff5e0d3fdc7f189ec2e948019Bjorn Bringert
105d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    # Find the first occurance of this search engine name in the form
106d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    # " <name> =" in the chrome data file.
107d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    re_exp = '\s' + name + '\s*='
108d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    search_obj = re.search(re_exp, self.chrome_data)
109d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    if not search_obj:
110d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      print ('Unable to find data for search engine ' + name +
111d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert             '. Please check the chrome data file for format changes.')
112d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      return None
113d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
114d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    # Extract the struct declaration between the curly braces.
115d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    start_pos = self.chrome_data.find('{', search_obj.start()) + 1;
116d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    end_pos = self.chrome_data.find('};', start_pos);
117d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    engine_data_str = self.chrome_data[start_pos:end_pos]
118d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
119d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    # Remove c++ style '//' comments at the ends of each line
120d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    engine_data_lines = engine_data_str.split('\n')
121d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    engine_data_str = ""
122d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    for line in engine_data_lines:
123d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert        start_pos = line.find(' // ')
124d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert        if start_pos != -1:
125d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert            line = line[:start_pos]
126d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert        engine_data_str = engine_data_str + line + '\n'
127d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
128d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    # Join multiple line strings into a single string.
129d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    engine_data_str = re.sub('\"\s+\"', '', engine_data_str)
130d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    engine_data_str = re.sub('\"\s+L\"', '', engine_data_str)
131d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    engine_data_str = engine_data_str.replace('"L"', '')
132d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
133d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    engine_data = engine_data_str.split(',')
134d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    for i in range(len(engine_data)):
135d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      engine_data[i] = self.getXmlString(engine_data[i])
136d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
137d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    # If the last element was an empty string (due to an extra comma at the
138d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    # end), ignore it.
139d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    if not engine_data[len(engine_data) - 1]:
140d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      engine_data.pop()
141d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
142d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    engine_data.insert(0, name)
143d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
144d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    return engine_data
145d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
146d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert  def getSearchEnginesForCountry(self, country):
147d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    """Returns the list of search engine names for the given country.
148d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
149d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    The data comes from the Chrome data file.
150d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    """
151d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    # The Chrome data file has an array defined with the name 'engines_XX'
152d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    # where XX = country.
153d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    pos = self.chrome_data.find('engines_' + country)
154d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    if pos == -1:
155d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      print ('Unable to find search engine data for country ' + country + '.')
156d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      return
157d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
158d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    # Extract the text between the curly braces for this array declaration
159d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    engines_start = self.chrome_data.find('{', pos) + 1;
160d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    engines_end = self.chrome_data.find('}', engines_start);
161d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    engines_str = self.chrome_data[engines_start:engines_end]
162d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
163d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    # Remove embedded /**/ style comments, white spaces, address-of operators
164d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    # and the trailing comma if any.
165d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    engines_str = re.sub('\/\*.+\*\/', '', engines_str)
166d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    engines_str = re.sub('\s+', '', engines_str)
167d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    engines_str = engines_str.replace('&','')
168d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    engines_str = engines_str.rstrip(',')
169d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
170d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    # Split the array into it's elements
171d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    engines = engines_str.split(',')
172d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
173d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    return engines
174d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
175d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert  def writeAllEngines(self):
176d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    """Writes all search engines to the all_search_engines.xml file.
177d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    """
178d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
179d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    all_search_engines_path = os.path.join(self.resdir, 'values/all_search_engines.xml')
180d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
181d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    text = []
182d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
183d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    for engine_name in self.all_engines:
184d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      engine_data = self.getEngineData(engine_name)
185d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      text.append('  <string-array name="%s" translatable="false">\n' % (engine_data[0]))
186d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      for i in range(1, 7):
187d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert        text.append('    <item>%s</item>\n' % (engine_data[i]))
188d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      text.append('  </string-array>\n')
189d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      print engine_data[1] + " added to all_search_engines.xml"
190d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
191d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    self.generateXmlFromTemplate(os.path.join(sys.path[0], 'all_search_engines.template.xml'),
192d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert        all_search_engines_path, text)
193d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
194d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert  def generateDefaultList(self):
195d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    self.writeEngineList(os.path.join(self.resdir, 'values'), "default")
196d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
197d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert  def generateListForLocale(self, locale):
198d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    """Creates a new locale specific search_engines.xml file.
199d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
200d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    The new file contains search engines specific to that country. If required
201d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    this function updates all_search_engines.xml file with any new search
202d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    engine data necessary.
203d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    """
204d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    separator_pos = locale.find('-')
205d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    if separator_pos == -1:
206d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      print ('Locale must be of format <language>-<country>. For e.g.'
207d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert             ' "es-US" or "en-GB"')
208d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      return
209d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
210d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    language = locale[0:separator_pos]
211d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    country = locale[separator_pos + 1:].upper()
212d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    dir_path = os.path.join(self.resdir, 'values-' + language + '-r' + country)
213d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
214d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    self.writeEngineList(dir_path, country)
215d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
216d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert  def writeEngineList(self, dir_path, country):
217d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    if os.path.exists(dir_path) and not os.path.isdir(dir_path):
218d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      print "File exists in output directory path " + dir_path + ". Please remove it and try again."
219d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      return
220d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
221d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    engines = self.getSearchEnginesForCountry(country)
222d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    if not engines:
223d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      return
224d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    for engine in engines:
225d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      self.all_engines.add(engine)
226d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
227d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    # Create the locale specific search_engines.xml file. Each
228d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    # search_engines.xml file has a hardcoded list of 7 items. If there are less
229d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    # than 7 search engines for this country, the remaining items are marked as
230d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    # enabled=false.
231d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    text = []
232d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    text.append('  <string-array name="search_engines" translatable="false">\n');
233d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    for engine in engines:
234d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      engine_data = self.getEngineData(engine)
235d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      name = engine_data[0]
236d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      text.append('    <item>%s</item>\n' % (name))
237d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    text.append('  </string-array>\n');
238d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
239d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    self.generateXmlFromTemplate(os.path.join(sys.path[0], 'search_engines.template.xml'),
240d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert        os.path.join(dir_path, 'search_engines.xml'),
241d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert        text)
242d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
243d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert  def generateXmlFromTemplate(self, template_path, out_path, text):
244d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    # Load the template file and insert the new contents before the last line.
245d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    template_text = open(template_path).read()
246d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    pos = template_text.rfind('\n', 0, -2) + 1
247d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    contents = template_text[0:pos] + ''.join(text) + template_text[pos:]
248d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
249d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    # Make sure what we have created is valid XML :) No need to check for errors
250d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    # as the script will terminate with an exception if the XML was malformed.
251d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    engines_dom = minidom.parseString(contents)
252d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
253d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    dir_path = os.path.dirname(out_path)
254d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    if not os.path.exists(dir_path):
255d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      os.makedirs(dir_path)
256d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert      print 'Created directory ' + dir_path
257d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    file = open(out_path, 'w')
258d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    file.write(contents)
259d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    file.close()
260d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    print 'Wrote ' + out_path
261d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
262d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringertif __name__ == "__main__":
263d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert  manager = SearchEngineManager()
264d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert  manager.generateDefaultList()
265d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert  for locale in locales:
266d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert    manager.generateListForLocale(locale)
267d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert  manager.writeAllEngines()
268d26706538834e0ed58bf28f08d9a2885c0e7efcbBjorn Bringert
269