1bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#/**
2bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# *******************************************************************************
32d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert# * Copyright (C) 2016 and later: Unicode, Inc. and others.                     *
42d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert# * License & terms of use: http://www.unicode.org/copyright.html#License       *
52d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert# *******************************************************************************
62d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert# *******************************************************************************
7bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# * Copyright (C) 2002-2004, International Business Machines Corporation and    *
8bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# * others. All Rights Reserved.                                                *
9bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# *******************************************************************************
10bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# */
11bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert@rem = '--*-Perl-*--
12bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert@echo off
13bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertif "%OS%" == "Windows_NT" goto WinNT
14bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertperl -W -x -S "%0" %1 %2 %3 %4 %5 %6 %7 %8 %9
15bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertgoto endofperl
16bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert:WinNT
17bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertperl -W -x -S "%0" %*
18bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertif NOT "%COMSPEC%" == "%SystemRoot%\system32\cmd.exe" goto endofperl
19bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertif %errorlevel% == 9009 echo You do not have Perl in your PATH.
20bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertgoto endofperl
21bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert@rem ';
22bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#!perl
23bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#line 14
24bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
25bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# This perl script updates the filters in the transliterator index file.
26bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# It does so in a dumb way:
27bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#
28bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#  Latin-X   NFD lower
29bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#  X-Latin   NFD
30bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#
31bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# For transliterators using NFKD, or not using Lower in this way, you
32bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# will have to hand-edit the index file.
33bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#
34bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# This script writes a new index file.  The new file has to then be
35bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# hand-edited and checked before use; it contains comments indicating
36bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# old lines that were replaced.
37bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#
38bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# Alan Liu 11/29/01
39bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
40bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertuse Getopt::Long;
41bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
42bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertmy $DIR = "../../text/resources";
43bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertmy $CLASSES = "../../../../../classes";
44bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
45bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#GetOptions('dir=s' => \$DIR,
46bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#           'id=s' => \$ID,
47bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#           '<>' => \&usage) || die;
48bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
49bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#usage() if (@ARGV);
50bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
51bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#$ID =~ s/-/_/;
52bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertif (! -d $DIR) {
53bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    print STDERR "$DIR is not a directory\n";
54bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    usage();
55bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert}
56bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
57bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#sub usage {
58bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#    my $me = $0;
59bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#    $me =~ s|.+[/\\]||;
60bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#    print "Usage: $me [-dir <dir>] [-id <id>]\n";
61bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#    print " --dir <dir> Specify the directory containing the\n";
62bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#    print "             Transliterator_*.txt files\n";
63bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#    print " --id <id>   Specify a single ID to transform, e.g.\n";
64bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#    print "             Fullwidth-Halfwidth\n";
65bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#    die;
66bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#}
67bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
68bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik RoubertconvertIndex();
69bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
70bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert######################################################################
71bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# Convert the index file from Java to C format
72bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# Assume lines are of the form:
73bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#   <ID>:alias:<FILTER>;<REMAINDER>
74bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# <REMAINDER> can be
75bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#   Lower;NFX;...
76bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#   NFX;Lower;...
77bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert#   NFX;...
78bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertsub convertIndex {
79bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    $IN = "Transliterator_index.txt";
80bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    $OUT = "$IN.new";
81bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    open(IN, "$DIR/$IN") or die;
82bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    open(OUT, ">$DIR/$OUT") or die;
83bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    
84bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    while (<IN>) {
85bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        # Look for lines that are aliases with NF*
86bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        if (/^([^:]+):alias:(\[.+?);\s*((NF[^\s]*?)\s*;.+)$/i) {
87bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            my $id = $1;
88bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            my $oldset = $2;
89bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            my $remainder = $3;
90bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            my $NFXD = $4;
91bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            my $lower = '';
92bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            # Check for Lower
93bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            # If it comes before NF* then adjust accordingly
94bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            if (/^([^:]+):alias:(\[.+?);\s*(Lower\s*;.+)$/i) {
95bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                $lower = 'lower';
96bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                if (length($2) < length($oldset)) {
97bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    $oldset = $2;
98bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                    $remainder = $3;
99bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert                }
100bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            }
101bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            print STDERR "$id $NFXD $lower\n";
102bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            my $set = getSourceSet($id, $NFXD, $lower);
103bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert            $_ = "$id:alias:$set;$remainder\n";
104bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        }
105bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert        print OUT;
106bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    }
107bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
108bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    close(IN);
109bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    close(OUT);
110bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    print STDERR "Wrote $DIR/$OUT\n";
111bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert}
112bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
113bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert######################################################################
114bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert# Get the source set (call out to Java), optionally with a closure.
115bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubertsub getSourceSet {
116bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    my $ID = shift;
117bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    my $NFXD = shift;
118bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    my $lower = shift;
119bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    my $set = `java -classpath $CLASSES com.ibm.tools.translit.genIndexFilters $ID $NFXD $lower`;
120bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    chomp($set);
121bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert    $set;
122bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert}
123bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert
124bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert__END__
125bd1cbb618dcaa1ac6ba7c77dece35cb79593a5d7Fredrik Roubert:endofperl
126