1#!/usr/bin/python
2#
3# Copyright (C) 2010 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17# Generates icudtXXl-default.dat from icudtXXl-all.dat and icu-data-default.txt.
18#
19# Usage:
20#    icu_dat_generator.py [-v] [-h]
21#
22# Sample usage:
23#   $ANDROID_BUILD_TOP/external/icu4c/stubdata$ ./icu_dat_generator.py --verbose
24
25import getopt
26import glob
27import os.path
28import re
29import shutil
30import subprocess
31import sys
32
33
34def PrintHelpAndExit():
35  print "Usage:"
36  print "  icu_dat_generator.py [-v|--verbose] [-h|--help]"
37  print "Example:"
38  print "  $ANDROID_BUILD_TOP/external/icu4c/stubdata$ ./icu_dat_generator.py"
39  sys.exit(1)
40
41
42def InvokeIcuTool(tool, working_dir, args):
43  command_list = [os.path.join(ICU_PREBUILT_DIR, tool)]
44  command_list.extend(args)
45
46  if VERBOSE:
47    command = "[%s] %s" % (working_dir, " ".join(command_list))
48    print command
49
50  ret = subprocess.call(command_list, cwd=working_dir)
51  if ret != 0:
52    sys.exit(command_list[0:])
53
54
55def ExtractAllResourceFilesToTmpDir():
56  # copy icudtXXl-all.dat to icudtXXl.dat
57  src_dat = os.path.join(ICU4C_DIR, "stubdata", ICU_DATA + "-all.dat")
58  dst_dat = os.path.join(ICU4C_DIR, "stubdata", ICU_DATA + ".dat")
59  shutil.copyfile(src_dat, dst_dat)
60  InvokeIcuTool("icupkg", None, [dst_dat, "-x", "*", "-d", TMP_DAT_PATH])
61
62
63def MakeDat(input_file, stubdata_dir):
64  print "------ Processing '%s'..." % (input_file)
65  if not os.path.isfile(input_file):
66    print "%s not a file!" % input_file
67    sys.exit(1)
68  GenResIndex(input_file)
69  CopyAndroidCnvFiles(stubdata_dir)
70  # Run "icupkg -tl -s icudtXXl -a icu-data-default.txt new icudtXXl.dat".
71  args = ["-tl", "-s", TMP_DAT_PATH, "-a", input_file, "new", ICU_DATA + ".dat"]
72  InvokeIcuTool("icupkg", TMP_DAT_PATH, args)
73
74
75def WriteIndex(path, locales):
76  empty_value = " {\"\"}\n"  # key-value pair for all locale entries
77
78  f = open(path, "w")
79  f.write("res_index:table(nofallback) {\n")
80  f.write("  InstalledLocales {\n")
81  for locale in locales:
82    f.write(locale + empty_value)
83
84  f.write("  }\n")
85  f.write("}\n")
86  f.close()
87
88
89def AddResFile(collection, path):
90  # There are two consumers of the the input .txt file: this script and
91  # icupkg. We only care about .res files, but icupkg needs files they depend
92  # on too, so it's not an error to have to ignore non-.res files here.
93  end = path.find(".res")
94  if end > 0:
95    collection.add(path[path.find("/")+1:end])
96  return
97
98
99# Open input file (such as icu-data-default.txt).
100# Go through the list and generate res_index.txt for locales, brkitr,
101# coll, et cetera.
102def GenResIndex(input_file):
103  res_index = "res_index.txt"
104
105  brkitrs = set()
106  colls = set()
107  currs = set()
108  langs = set()
109  locales = set()
110  regions = set()
111  zones = set()
112
113  for line in open(input_file, "r"):
114    if "root." in line or "res_index" in line or "_.res" in line:
115      continue
116    if "brkitr/" in line:
117      AddResFile(brkitrs, line)
118    elif "coll/" in line:
119      AddResFile(colls, line)
120    elif "curr/" in line:
121      AddResFile(currs, line)
122    elif "lang/" in line:
123      AddResFile(langs, line)
124    elif "region/" in line:
125      AddResFile(regions, line)
126    elif "zone/" in line:
127      AddResFile(zones, line)
128    elif ".res" in line:
129      # We need to determine the resource is locale resource or misc resource.
130      # To determine the locale resource, we assume max script length is 3.
131      end = line.find(".res")
132      if end <= 3 or (line.find("_") <= 3 and line.find("_") > 0):
133        locales.add(line[:end])
134
135  kind_to_locales = {
136      "brkitr": brkitrs,
137      "coll": colls,
138      "curr": currs,
139      "lang": langs,
140      "locales": locales,
141      "region": regions,
142      "zone": zones
143  }
144
145  # Find every locale we've mentioned, for whatever reason.
146  every_locale = set()
147  for locales in kind_to_locales.itervalues():
148    every_locale = every_locale.union(locales)
149
150  if VERBOSE:
151    for kind, locales in kind_to_locales.items():
152      print "%s=%s" % (kind, sorted(locales))
153
154  # Print a human-readable list of the languages supported.
155  every_language = set()
156  for locale in every_locale:
157    language = re.sub(r"(_.*)", "", locale)
158    if language != "pool" and language != "supplementalData":
159      every_language.add(language)
160  input_basename = os.path.basename(input_file)
161  print "%s includes %s." % (input_basename, ", ".join(sorted(every_language)))
162
163  # Find cases where we've included only part of a locale's data.
164  missing_files = []
165  for locale in every_locale:
166    for kind, locales in kind_to_locales.items():
167      p = os.path.join(ICU4C_DIR, "data", kind, locale + ".txt")
168      if not locale in locales and os.path.exists(p):
169        missing_files.append(p)
170
171  # Warn about the missing files.
172  for missing_file in sorted(missing_files):
173    relative_path = "/".join(missing_file.split("/")[-2:])
174    print "warning: missing data for supported locale: %s" % relative_path
175
176  # Write the genrb input files.
177  WriteIndex(os.path.join(TMP_DAT_PATH, res_index), locales)
178  for kind, locales in kind_to_locales.items():
179    if kind == "locales":
180      continue
181    WriteIndex(os.path.join(TMP_DAT_PATH, kind, res_index), locales)
182
183  # Call genrb to generate new res_index.res.
184  InvokeIcuTool("genrb", TMP_DAT_PATH, [res_index])
185  for kind, locales in kind_to_locales.items():
186    if kind == "locales":
187      continue
188    InvokeIcuTool("genrb", os.path.join(TMP_DAT_PATH, kind), [res_index])
189
190
191def CopyAndroidCnvFiles(stubdata_dir):
192  android_specific_cnv = ["gsm-03.38-2000.cnv",
193                          "iso-8859_16-2001.cnv",
194                          "docomo-shift_jis-2007.cnv",
195                          "kddi-jisx-208-2007.cnv",
196                          "kddi-shift_jis-2007.cnv",
197                          "softbank-jisx-208-2007.cnv",
198                          "softbank-shift_jis-2007.cnv"]
199  for cnv_file in android_specific_cnv:
200    src_path = os.path.join(stubdata_dir, "cnv", cnv_file)
201    dst_path = os.path.join(TMP_DAT_PATH, cnv_file)
202    shutil.copyfile(src_path, dst_path)
203    if VERBOSE:
204      print "copy " + src_path + " " + dst_path
205
206
207def main():
208  global ANDROID_BUILD_TOP  # $ANDROID_BUILD_TOP
209  global ICU4C_DIR          # $ANDROID_BUILD_TOP/external/icu4c
210  global ICU_PREBUILT_DIR   # Directory containing pre-built ICU tools.
211  global ICU_DATA           # e.g. "icudt50l"
212  global TMP_DAT_PATH       # Temporary directory to store all resource files and
213                            # intermediate dat files.
214  global VERBOSE
215
216  VERBOSE = False
217
218  show_help = False
219  try:
220    opts, args = getopt.getopt(sys.argv[1:], "hv", ["help", "verbose"])
221  except getopt.error:
222    PrintHelpAndExit()
223  for opt, _ in opts:
224    if opt in ("-h", "--help"):
225      show_help = True
226    elif opt in ("-v", "--verbose"):
227      VERBOSE = True
228  if args:
229    show_help = True
230
231  if show_help:
232    PrintHelpAndExit()
233
234  ANDROID_BUILD_TOP = os.environ.get("ANDROID_BUILD_TOP")
235  if not ANDROID_BUILD_TOP:
236    print "$ANDROID_BUILD_TOP not set! Run 'env_setup.sh'."
237    sys.exit(1)
238  ICU4C_DIR = os.path.join(ANDROID_BUILD_TOP, "external", "icu4c")
239  stubdata_dir = os.path.join(ICU4C_DIR, "stubdata")
240
241  # Work out the ICU version from the source .dat filename, so we can find the
242  # appropriate pre-built ICU tools.
243  source_dat = os.path.basename(glob.glob(os.path.join(stubdata_dir, "icudt*.dat"))[0])
244  icu_version = re.sub(r"([^0-9])", "", source_dat)
245  ICU_PREBUILT_DIR = os.path.join(os.environ.get("ANDROID_BUILD_TOP"),
246      "prebuilts", "misc", "linux-x86_64", "icu-%s%s" % (icu_version[0], icu_version[1]))
247  if not os.path.exists(ICU_PREBUILT_DIR):
248    print "%s does not exist!" % ICU_PREBUILT_DIR
249
250  ICU_DATA = "icudt" + icu_version + "l"
251
252  # Check that icudtXXl-all.dat exists (since we build the other .dat files from that).
253  full_data_filename = os.path.join(stubdata_dir, ICU_DATA + "-all.dat")
254  if not os.path.isfile(full_data_filename):
255    print "%s not present." % full_data_filename
256    sys.exit(1)
257
258  # Create a temporary working directory.
259  TMP_DAT_PATH = os.path.join(ICU4C_DIR, "tmp")
260  if os.path.exists(TMP_DAT_PATH):
261    shutil.rmtree(TMP_DAT_PATH)
262  os.mkdir(TMP_DAT_PATH)
263
264  # Extract resource files from icudtXXl-all.dat to TMP_DAT_PATH.
265  ExtractAllResourceFilesToTmpDir()
266
267  input_file = os.path.join(stubdata_dir, "icu-data-default.txt")
268  output_file = os.path.join(stubdata_dir, ICU_DATA + "-default.dat")
269  MakeDat(input_file, stubdata_dir)
270  shutil.copyfile(os.path.join(TMP_DAT_PATH, ICU_DATA + ".dat"), output_file)
271  print "Generated ICU data: %s" % output_file
272
273  # Cleanup temporary working directory and icudtXXl.dat
274  shutil.rmtree(TMP_DAT_PATH)
275  os.remove(os.path.join(stubdata_dir, ICU_DATA + ".dat"))
276
277if __name__ == "__main__":
278  main()
279