14e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org#!/bin/bash
24e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# Copyright (c) 2014 The Chromium Authors. All rights reserved.
34e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# Use of this source code is governed by a BSD-style license that can be
44e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# found in the LICENSE file.
54e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org
64e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org
74e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# Remove display names for languages that are not listed in the accept-language
84e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# list of Chromium.
94e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orgfunction filter_display_language_names {
104266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org  for lang in $(grep -v '^#' accept_lang.list)
114266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org  do
124266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org    # Set $OP to '|' only if $ACCEPT_LANG_PATTERN is not empty.
134266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org    OP=${ACCEPT_LANG_PATTERN:+|}
144266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org    ACCEPT_LANG_PATTERN="${ACCEPT_LANG_PATTERN}${OP}${lang}"
154266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org  done
164266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org  ACCEPT_LANG_PATTERN="(${ACCEPT_LANG_PATTERN})[^a-z]"
174e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org
184266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org  echo "Filtering out display names for non-A-L languages ${langdatapath}"
194e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  for lang in $(grep -v '^#' chrome_ui_languages.list)
204e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  do
214e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org    target=${langdatapath}/${lang}.txt
224266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org    echo Overwriting ${target} ...
234e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org    sed -r -i \
244e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org    '/^    Keys\{$/,/^    \}$/d
254e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org     /^    Languages\{$/, /^    \}$/ {
264e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org       /^    Languages\{$/p
274266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org       /^        '${ACCEPT_LANG_PATTERN}'/p
284e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org       /^    \}$/p
294e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org       d
304e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org     }
314e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org     /^    Types\{$/,/^    \}$/d
324e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org     /^    Variants\{$/,/^    \}$/d' ${target}
334e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  done
344e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org}
354e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org
364e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org
374e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# Keep only the minimum locale data for non-UI languages.
384e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orgfunction abridge_locale_data_for_non_ui_languages {
394266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org  for lang in $(grep -v '^#' chrome_ui_languages.list)
404266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org  do
414266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org    # Set $OP to '|' only if $UI_LANGUAGES is not empty.
424266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org    OP=${UI_LANGUAGES:+|}
434266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org    UI_LANGUAGES="${UI_LANGUAGES}${OP}${lang}"
444266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org  done
454266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org
464e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  EXTRA_LANGUAGES=$(egrep -v -e '^#' -e "(${UI_LANGUAGES})" accept_lang.list)
474e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org
484e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  echo Creating minimum locale data in ${localedatapath}
494e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  for lang in ${EXTRA_LANGUAGES}
504e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  do
514e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org    target=${localedatapath}/${lang}.txt
524e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org    [  -e ${target} ] || { echo "missing ${lang}"; continue; }
534e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org    echo Overwriting ${target} ...
544e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org    sed -n -r -i \
554e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org      '1, /^'${lang}'\{$/p
564266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org       /^    "%%ALIAS"\{/p
574e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org       /^    AuxExemplarCharacters\{.*\}$/p
584e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org       /^    AuxExemplarCharacters\{$/, /^    \}$/p
594e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org       /^    ExemplarCharacters\{.*\}$/p
604e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org       /^    ExemplarCharacters\{$/, /^    \}$/p
614e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org       /^    (LocaleScript|layout)\{$/, /^    \}$/p
624e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org       /^    Version\{.*$/p
634e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org       /^\}$/p' ${target}
644e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  done
654e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org
664e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  echo Creating minimum locale data in ${langdatapath}
674e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  for lang in ${EXTRA_LANGUAGES}
684e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  do
694e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org    target=${langdatapath}/${lang}.txt
704e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org    [  -e ${target} ] || { echo "missing ${lang}"; continue; }
714e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org    echo Overwriting ${target} ...
724e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org    sed -n -r -i \
734e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org      '1, /^'${lang}'\{$/p
744e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org       /^    Languages\{$/, /^    \}$/ {
754e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org         /^    Languages\{$/p
764e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org         /^        '${lang}'\{.*\}$/p
774e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org         /^    \}$/p
784e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org       }
794e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org       /^\}$/p' ${target}
804e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  done
814e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org}
824e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org
834e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# Drop historic currencies.
844e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# TODO(jshin): Use ucurr_isAvailable in ICU to drop more currencies.
854e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# See also http://en.wikipedia.org/wiki/List_of_circulating_currencies
864e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orgfunction filter_currency_data {
874e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  for currency in $(grep -v '^#' currencies_to_drop.list)
884e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  do
894e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org    OP=${DROPLIST:+|}
904e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org    DROPLIST=${DROPLIST}${OP}${currency}
914e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  done
924e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  DROPLIST="(${DROPLIST})\{"
934e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org
944e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  cd "${dataroot}/curr"
954e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  for i in *.txt
964e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  do
974e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org    [ $i != 'supplementalData.txt' ] && \
984e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org    sed -r -i '/^        '$DROPLIST'/, /^        }/ d' $i
994e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  done
1004e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org}
1014e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org
1024e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# Remove the display names for numeric region codes other than
1034e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# 419 (Latin America) because we don't use them.
1044e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orgfunction filter_region_data {
1054e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  cd "${dataroot}/region"
1064e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  sed -i  '/[0-35-9][0-9][0-9]{/ d' *.txt
1074e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org}
1084e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org
1094e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org
1104e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org
1114e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orgfunction remove_exemplar_cities {
1124e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  cd "${dataroot}/zone"
1134e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  for i in *.txt
1144e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  do
1154e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org    [ $i != 'root.txt' ] && \
1164e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org    sed -i '/^    zoneStrings/, /^        "meta:/ {
1174e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org      /^    zoneStrings/ p
1184e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org      /^        "meta:/ p
1194e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org      d
1204e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org    }' $i
1214e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org  done
1224e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org}
1234e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org
1244a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org# Keep only duration and compound in units* sections.
1254a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.orgfunction filter_locale_data {
1264266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org  for i in ${dataroot}/locales/*.txt
1274a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org  do
1284a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org    echo Overwriting $i ...
1294a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org    sed -r -i \
1304a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org      '/^    units(|Narrow|Short)\{$/, /^    \}$/ {
1314a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org         /^    units(|Narrow|Short)\{$/ p
1324a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org         /^        (duration|compound)\{$/, /^        \}$/ p
1334a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org         /^    \}$/ p
1344a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org         d
1354a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org       }' ${i}
1364a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org  done
1374a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org}
1384a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org
139991d1f1eb3f60c0e07ce1c4831de949ab124019fjshin@chromium.org# big5han and gb2312han collation do not make any sense and nobody uses them.
140991d1f1eb3f60c0e07ce1c4831de949ab124019fjshin@chromium.orgfunction remove_legacy_chinese_codepoint_collation {
141991d1f1eb3f60c0e07ce1c4831de949ab124019fjshin@chromium.org  echo "Removing Big5 / GB2312 collation data from Chinese locale"
142991d1f1eb3f60c0e07ce1c4831de949ab124019fjshin@chromium.org  target="${dataroot}/coll/zh.txt"
143991d1f1eb3f60c0e07ce1c4831de949ab124019fjshin@chromium.org  echo "Overwriting ${target}"
144991d1f1eb3f60c0e07ce1c4831de949ab124019fjshin@chromium.org  sed -r -i '/^        (big5|gb2312)han\{$/,/^        \}$/ d' ${target}
145991d1f1eb3f60c0e07ce1c4831de949ab124019fjshin@chromium.org}
146991d1f1eb3f60c0e07ce1c4831de949ab124019fjshin@chromium.org
1474e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orgdataroot="$(dirname $0)/../source/data"
1484e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orglocaledatapath="${dataroot}/locales"
1494e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orglangdatapath="${dataroot}/lang"
1504e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org
1514e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org
1524e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org
1534e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orgfilter_display_language_names
1544e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orgabridge_locale_data_for_non_ui_languages
1554e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orgfilter_currency_data
1564e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orgfilter_region_data
157991d1f1eb3f60c0e07ce1c4831de949ab124019fjshin@chromium.orgremove_legacy_chinese_codepoint_collation
1584a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.orgfilter_locale_data
1594e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org
1604e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# Chromium OS needs exemplar cities for timezones, but not Chromium.
1614e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# It'll save 400kB (uncompressed), but the size difference in
1624e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# 7z compressed installer is <= 100kB.
1634e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# TODO(jshin): Make separate data files for CrOS and Chromium.
1644a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org#remove_exemplar_cities
165