1bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#/** 2bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# ******************************************************************************* 32d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert# * Copyright (C) 2016 and later: Unicode, Inc. and others. * 42d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert# * License & terms of use: http://www.unicode.org/copyright.html#License * 52d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert# ******************************************************************************* 62d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert# ******************************************************************************* 7bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# * Copyright (C) 2002-2004, International Business Machines Corporation and * 8bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# * others. All Rights Reserved. * 9bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# ******************************************************************************* 10bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# */ 11bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert@rem = '--*-Perl-*-- 12bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert@echo off 13bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertif "%OS%" == "Windows_NT" goto WinNT 14bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertperl -W -x -S "%0" %1 %2 %3 %4 %5 %6 %7 %8 %9 15bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertgoto endofperl 16bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert:WinNT 17bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertperl -W -x -S "%0" %* 18bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertif NOT "%COMSPEC%" == "%SystemRoot%\system32\cmd.exe" goto endofperl 19bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertif %errorlevel% == 9009 echo You do not have Perl in your PATH. 20bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertgoto endofperl 21bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert@rem '; 22bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#!perl 23bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#line 14 24bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 25bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# This perl script updates the filters in the transliterator index file. 26bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# It does so in a dumb way: 27bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# 28bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# Latin-X NFD lower 29bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# X-Latin NFD 30bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# 31bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# For transliterators using NFKD, or not using Lower in this way, you 32bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# will have to hand-edit the index file. 33bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# 34bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# This script writes a new index file. The new file has to then be 35bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# hand-edited and checked before use; it contains comments indicating 36bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# old lines that were replaced. 37bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# 38bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# Alan Liu 11/29/01 39bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 40bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertuse Getopt::Long; 41bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 42bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertmy $DIR = "../../text/resources"; 43bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertmy $CLASSES = "../../../../../classes"; 44bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 45bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#GetOptions('dir=s' => \$DIR, 46bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# 'id=s' => \$ID, 47bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# '<>' => \&usage) || die; 48bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 49bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#usage() if (@ARGV); 50bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 51bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#$ID =~ s/-/_/; 52bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertif (! -d $DIR) { 53bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert print STDERR "$DIR is not a directory\n"; 54bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert usage(); 55bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert} 56bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 57bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#sub usage { 58bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# my $me = $0; 59bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# $me =~ s|.+[/\\]||; 60bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# print "Usage: $me [-dir <dir>] [-id <id>]\n"; 61bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# print " --dir <dir> Specify the directory containing the\n"; 62bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# print " Transliterator_*.txt files\n"; 63bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# print " --id <id> Specify a single ID to transform, e.g.\n"; 64bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# print " Fullwidth-Halfwidth\n"; 65bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# die; 66bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#} 67bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 68bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik RoubertconvertIndex(); 69bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 70bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert###################################################################### 71bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# Convert the index file from Java to C format 72bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# Assume lines are of the form: 73bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# <ID>:alias:<FILTER>;<REMAINDER> 74bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# <REMAINDER> can be 75bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# Lower;NFX;... 76bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# NFX;Lower;... 77bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# NFX;... 78bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertsub convertIndex { 79bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert $IN = "Transliterator_index.txt"; 80bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert $OUT = "$IN.new"; 81bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert open(IN, "$DIR/$IN") or die; 82bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert open(OUT, ">$DIR/$OUT") or die; 83bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 84bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert while (<IN>) { 85bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert # Look for lines that are aliases with NF* 86bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (/^([^:]+):alias:(\[.+?);\s*((NF[^\s]*?)\s*;.+)$/i) { 87bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert my $id = $1; 88bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert my $oldset = $2; 89bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert my $remainder = $3; 90bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert my $NFXD = $4; 91bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert my $lower = ''; 92bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert # Check for Lower 93bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert # If it comes before NF* then adjust accordingly 94bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (/^([^:]+):alias:(\[.+?);\s*(Lower\s*;.+)$/i) { 95bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert $lower = 'lower'; 96bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert if (length($2) < length($oldset)) { 97bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert $oldset = $2; 98bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert $remainder = $3; 99bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 100bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 101bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert print STDERR "$id $NFXD $lower\n"; 102bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert my $set = getSourceSet($id, $NFXD, $lower); 103bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert $_ = "$id:alias:$set;$remainder\n"; 104bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 105bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert print OUT; 106bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert } 107bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 108bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert close(IN); 109bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert close(OUT); 110bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert print STDERR "Wrote $DIR/$OUT\n"; 111bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert} 112bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 113bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert###################################################################### 114bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# Get the source set (call out to Java), optionally with a closure. 115bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertsub getSourceSet { 116bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert my $ID = shift; 117bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert my $NFXD = shift; 118bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert my $lower = shift; 119bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert my $set = `java -classpath $CLASSES com.ibm.tools.translit.genIndexFilters $ID $NFXD $lower`; 120bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert chomp($set); 121bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert $set; 122bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert} 123bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert 124bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert__END__ 125bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert:endofperl 126