14e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org#!/bin/bash 24e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# Copyright (c) 2014 The Chromium Authors. All rights reserved. 34e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# Use of this source code is governed by a BSD-style license that can be 44e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# found in the LICENSE file. 54e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org 64e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org 74e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# Remove display names for languages that are not listed in the accept-language 84e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# list of Chromium. 94e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orgfunction filter_display_language_names { 104266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org for lang in $(grep -v '^#' accept_lang.list) 114266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org do 124266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org # Set $OP to '|' only if $ACCEPT_LANG_PATTERN is not empty. 134266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org OP=${ACCEPT_LANG_PATTERN:+|} 144266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org ACCEPT_LANG_PATTERN="${ACCEPT_LANG_PATTERN}${OP}${lang}" 154266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org done 164266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org ACCEPT_LANG_PATTERN="(${ACCEPT_LANG_PATTERN})[^a-z]" 174e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org 184266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org echo "Filtering out display names for non-A-L languages ${langdatapath}" 194e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org for lang in $(grep -v '^#' chrome_ui_languages.list) 204e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org do 214e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org target=${langdatapath}/${lang}.txt 224266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org echo Overwriting ${target} ... 234e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org sed -r -i \ 244e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org '/^ Keys\{$/,/^ \}$/d 254e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org /^ Languages\{$/, /^ \}$/ { 264e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org /^ Languages\{$/p 274266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org /^ '${ACCEPT_LANG_PATTERN}'/p 284e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org /^ \}$/p 294e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org d 304e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org } 314e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org /^ Types\{$/,/^ \}$/d 324e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org /^ Variants\{$/,/^ \}$/d' ${target} 334e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org done 344e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org} 354e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org 364e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org 374e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# Keep only the minimum locale data for non-UI languages. 384e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orgfunction abridge_locale_data_for_non_ui_languages { 394266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org for lang in $(grep -v '^#' chrome_ui_languages.list) 404266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org do 414266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org # Set $OP to '|' only if $UI_LANGUAGES is not empty. 424266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org OP=${UI_LANGUAGES:+|} 434266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org UI_LANGUAGES="${UI_LANGUAGES}${OP}${lang}" 444266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org done 454266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org 464e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org EXTRA_LANGUAGES=$(egrep -v -e '^#' -e "(${UI_LANGUAGES})" accept_lang.list) 474e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org 484e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org echo Creating minimum locale data in ${localedatapath} 494e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org for lang in ${EXTRA_LANGUAGES} 504e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org do 514e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org target=${localedatapath}/${lang}.txt 524e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org [ -e ${target} ] || { echo "missing ${lang}"; continue; } 534e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org echo Overwriting ${target} ... 544e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org sed -n -r -i \ 554e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org '1, /^'${lang}'\{$/p 564266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org /^ "%%ALIAS"\{/p 574e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org /^ AuxExemplarCharacters\{.*\}$/p 584e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org /^ AuxExemplarCharacters\{$/, /^ \}$/p 594e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org /^ ExemplarCharacters\{.*\}$/p 604e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org /^ ExemplarCharacters\{$/, /^ \}$/p 614e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org /^ (LocaleScript|layout)\{$/, /^ \}$/p 624e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org /^ Version\{.*$/p 634e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org /^\}$/p' ${target} 644e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org done 654e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org 664e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org echo Creating minimum locale data in ${langdatapath} 674e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org for lang in ${EXTRA_LANGUAGES} 684e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org do 694e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org target=${langdatapath}/${lang}.txt 704e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org [ -e ${target} ] || { echo "missing ${lang}"; continue; } 714e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org echo Overwriting ${target} ... 724e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org sed -n -r -i \ 734e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org '1, /^'${lang}'\{$/p 744e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org /^ Languages\{$/, /^ \}$/ { 754e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org /^ Languages\{$/p 764e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org /^ '${lang}'\{.*\}$/p 774e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org /^ \}$/p 784e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org } 794e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org /^\}$/p' ${target} 804e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org done 814e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org} 824e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org 834e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# Drop historic currencies. 844e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# TODO(jshin): Use ucurr_isAvailable in ICU to drop more currencies. 854e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# See also http://en.wikipedia.org/wiki/List_of_circulating_currencies 864e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orgfunction filter_currency_data { 874e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org for currency in $(grep -v '^#' currencies_to_drop.list) 884e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org do 894e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org OP=${DROPLIST:+|} 904e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org DROPLIST=${DROPLIST}${OP}${currency} 914e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org done 924e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org DROPLIST="(${DROPLIST})\{" 934e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org 944e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org cd "${dataroot}/curr" 954e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org for i in *.txt 964e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org do 974e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org [ $i != 'supplementalData.txt' ] && \ 984e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org sed -r -i '/^ '$DROPLIST'/, /^ }/ d' $i 994e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org done 1004e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org} 1014e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org 1024e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# Remove the display names for numeric region codes other than 1034e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# 419 (Latin America) because we don't use them. 1044e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orgfunction filter_region_data { 1054e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org cd "${dataroot}/region" 1064e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org sed -i '/[0-35-9][0-9][0-9]{/ d' *.txt 1074e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org} 1084e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org 1094e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org 1104e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org 1114e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orgfunction remove_exemplar_cities { 1124e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org cd "${dataroot}/zone" 1134e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org for i in *.txt 1144e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org do 1154e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org [ $i != 'root.txt' ] && \ 1164e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org sed -i '/^ zoneStrings/, /^ "meta:/ { 1174e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org /^ zoneStrings/ p 1184e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org /^ "meta:/ p 1194e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org d 1204e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org }' $i 1214e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org done 1224e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org} 1234e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org 1244a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org# Keep only duration and compound in units* sections. 1254a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.orgfunction filter_locale_data { 1264266d6d1cef01ba9607a630aa99b60545d8262a7jshin@chromium.org for i in ${dataroot}/locales/*.txt 1274a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org do 1284a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org echo Overwriting $i ... 1294a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org sed -r -i \ 1304a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org '/^ units(|Narrow|Short)\{$/, /^ \}$/ { 1314a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org /^ units(|Narrow|Short)\{$/ p 1324a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org /^ (duration|compound)\{$/, /^ \}$/ p 1334a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org /^ \}$/ p 1344a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org d 1354a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org }' ${i} 1364a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org done 1374a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org} 1384a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org 139991d1f1eb3f60c0e07ce1c4831de949ab124019fjshin@chromium.org# big5han and gb2312han collation do not make any sense and nobody uses them. 140991d1f1eb3f60c0e07ce1c4831de949ab124019fjshin@chromium.orgfunction remove_legacy_chinese_codepoint_collation { 141991d1f1eb3f60c0e07ce1c4831de949ab124019fjshin@chromium.org echo "Removing Big5 / GB2312 collation data from Chinese locale" 142991d1f1eb3f60c0e07ce1c4831de949ab124019fjshin@chromium.org target="${dataroot}/coll/zh.txt" 143991d1f1eb3f60c0e07ce1c4831de949ab124019fjshin@chromium.org echo "Overwriting ${target}" 144991d1f1eb3f60c0e07ce1c4831de949ab124019fjshin@chromium.org sed -r -i '/^ (big5|gb2312)han\{$/,/^ \}$/ d' ${target} 145991d1f1eb3f60c0e07ce1c4831de949ab124019fjshin@chromium.org} 146991d1f1eb3f60c0e07ce1c4831de949ab124019fjshin@chromium.org 1474e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orgdataroot="$(dirname $0)/../source/data" 1484e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orglocaledatapath="${dataroot}/locales" 1494e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orglangdatapath="${dataroot}/lang" 1504e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org 1514e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org 1524e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org 1534e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orgfilter_display_language_names 1544e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orgabridge_locale_data_for_non_ui_languages 1554e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orgfilter_currency_data 1564e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.orgfilter_region_data 157991d1f1eb3f60c0e07ce1c4831de949ab124019fjshin@chromium.orgremove_legacy_chinese_codepoint_collation 1584a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.orgfilter_locale_data 1594e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org 1604e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# Chromium OS needs exemplar cities for timezones, but not Chromium. 1614e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# It'll save 400kB (uncompressed), but the size difference in 1624e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# 7z compressed installer is <= 100kB. 1634e4932615b7e125ce9e7a5a3972bc3237cb8e1b0jshin@chromium.org# TODO(jshin): Make separate data files for CrOS and Chromium. 1644a39040d7b0c1583008ad5bcece0c80d1f4eb8c2jshin@chromium.org#remove_exemplar_cities 165