1#!/usr/bin/perl
2#*
3#*******************************************************************************
4#*   Copyright (C) 2006, International Business Machines
5#*   Corporation and others.  All Rights Reserved.
6#*******************************************************************************
7#*
8#*   file name:  genspva.pl
9#*   encoding:   US-ASCII
10#*   tab size:   8 (not used)
11#*   indentation:4
12#*
13#*   Created by: Ram Viswanadha
14#*
15#* This file filters iso15924-utf8-<date>.txt
16#*
17
18use File::Find;
19use File::Basename;
20use IO::File;
21use Cwd;
22use File::Copy;
23use Getopt::Long;
24use File::Path;
25use File::Copy;
26
27#run the program
28main();
29
30#---------------------------------------------------------------------
31# The main program
32
33sub main(){
34    GetOptions(
35           "--destdir=s" => \$destdir,
36           "--iso15924=s"  => \$iso,
37           "--prop=s"  => \$prop,
38           "--code-start=s"  => \$code,
39           );
40    usage() unless defined $destdir;
41    usage() unless defined $iso;
42    usage() unless defined $prop;
43
44    $outfile = "$destdir/SyntheticPropertyValueAliases.txt";
45    $propFH = IO::File->new($prop,"r")
46            or die  "could not open the file $prop for reading: $! \n";
47    $isoFH = IO::File->new($iso,"r")
48            or die  "could not open the file $iso for reading: $! \n";
49    $outFH = IO::File->new($outfile,"w")
50            or die  "could not open the file $outfile for reading: $! \n";
51    my @propLines;
52    while (<$propFH>) {
53        next if(!($_ =~/sc ; /));
54        push(@propLines, $_);
55    }
56    printHeader($outFH);
57    if(defined $code){
58        print "Please add the following to UScriptCode enum in uscript.h.\n";
59        print "#ifndef U_HIDE_DRAFT_API\n";
60    }
61    while (<$isoFH>) {
62        next if($_=~/^#/);#skip if the line starts with a comment char
63        ($script, $t, $name, $rest) = split(/;/,$_,4);
64        #sc ; Arab
65        $outstr = "sc ; $script";
66        $encoded = 0; #false
67
68        # seach the propLines to make sure that this scipt code is not
69        # encoded in Unicode
70        foreach $key (@propLines){
71            if($key =~ /$outstr/){
72                $encoded = 1;
73            }
74        }
75        next if($encoded == 1);
76        #ignore private use codes
77        next if($script =~ /Qa[ab][a-z]/);
78
79        #if($script eq "Qaaa"){
80        #    $outstr = $outstr." ; Private_Use_Start\n";
81        #}elsif($script eq  "Qabx"){
82        #    $outstr = $outstr." ; Private_Use_End\n";
83        #}else{
84        #    $outstr = $outstr." ; $script \n";
85        #}
86
87        $outstr = $outstr." ; $script \n";
88        print $outFH $outstr;
89
90        #print to console
91        if(defined $code){
92            if($name =~ /[(\s,\x80-\xFF]/){
93                $name = $script;
94            }
95            $name =~s/-/_/g;
96
97            $scriptcode =  "USCRIPT_".uc($name);
98            print "      $scriptcode          = $code, /* $script */\n";
99            $code++;
100        }
101
102    }
103    if(defined $code){
104        print "#endif /* U_HIDE_DRAFT_API */\n";
105    }
106    for($i=0; $i<2; $i++){
107
108    }
109    close($isoFH);
110    close($propFH);
111    close($outFH);
112}
113#-----------------------------------------------------------------------
114sub printHeader{
115    ($outFH) = @_;
116    ($DAY, $MONTH, $YEAR) = (localtime)[3,4,5];
117    $YEAR += 1900;
118    #We will print our copyright here + warnings
119print $outFH <<END_HEADER_COMMENT;
120########################################################################
121# Copyright (c) 2006-$YEAR, International Business Machines
122# Corporation and others.  All Rights Reserved.
123########################################################################
124#   file name:      SyntheticPropertyValueAliases.txt
125#   encoding:       US-ASCII
126#   tab size:       8 (not used)
127#   indentation:    4
128#   created by:     gensvpa.pl
129########################################################################
130
131# This file follows the format of PropertyValueAliases.txt
132# It contains synthetic property value aliases not present
133# in the UCD.  Unlike PropertyValueAliases.txt, it should
134# NOT contain a version number.
135
136########################################################################
137#  THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW
138#  WHAT YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
139########################################################################
140
141# set the same names as short and long names to fit the syntax without
142# inventing names that we would have to support forever
143
144# Script (sc)
145
146END_HEADER_COMMENT
147}
148#-----------------------------------------------------------------------
149sub usage {
150    print << "END";
151Usage:
152gensvpa.pl
153Options:
154        --destdir=<directory>
155        --iso15924=<file name>
156        --prop=<PropertyValueAliases.txt>
157        --code-start=s
158e.g.: gensvpa.pl  --destdir=<icu>/source/tools/genpname --iso15924=iso15924-utf8-20041025.txt --prop=<icu>/source/data/unidata --code-start=60
159END
160    exit(0);
161}