1
2use Getopt::Long;
3
4$rc = GetOptions("pht=s" => \$phtfile,
5		 "i=s" => \$oklongfile,
6		 "ok=s" => \$okfile);
7
8load_phtfile($phtfile) if(defined $phtfile);
9
10sub load_phtfile
11{
12    my $phtfile = shift(@_);
13    open(PHT, "<$phtfile") || die "error opening phtfile $phtfile\n";
14    print STDERR "using phtfile $phtfile\n";
15    <PHT>; # header
16    while(<PHT>) {
17	s/\s+$//g;
18	($trash,$lph,$sph,$num_states) = split(/\s+/);
19	$lph_for_sph{$sph} = $lph;
20	$sph_for_lph{$lph} = $sph;
21	$lphhash{$lph}++;
22    }
23    close(PHT);
24    $sph = $lph = "&";
25    $lph_for_sph{$sph} = $lph;
26    $sph_for_lph{$lph} = $sph;
27    $lphhash{$lph}++;
28    $sph = "#"; $lph = "iwt";
29    $lph_for_sph{$sph} = $lph;
30    $sph_for_lph{$lph} = $sph;
31    $lphhash{$lph}++;
32}
33
34open(HH, "<$oklongfile") || die "error opening okfile $oklongfile\n";
35open(OO, ">$okfile") || die "error opening output dict $okfile\n";
36while(<HH>) {
37    s/\s+$//;
38    if(/^LANG\s*=\s*(\S+)/) { # LANG = EN-US
39	my $language = lc($1);
40	my $language_header_line = $_;
41	$language =~ s/\-/\./g;
42	if(!defined $phtfile) {
43	    die "Error: ESRSDK is not defined\n" if(!defined $ENV{ESRSDK});
44	    $phtfile = "$ENV{ESRSDK}/config/$language/models/generic.pht";
45	    load_phtfile( $phtfile);
46	}
47	print OO "$language_header_line\n";
48	next;
49    }
50    s/\s+$//;
51    s/^\s+//;
52    if(/\#\#/) {
53	next if($skip_funnies);
54	s/\#\#.*$//;
55    }
56    ($word, $pron) = split(/\s*\t\s*/, $_, 2);
57
58    @lphlist = split(/\s+/, $pron);
59    @sphlist = ();
60    foreach $lph (@lphlist) {
61	die "error: unknown lph $lph in $word\n" if(!defined  $sph_for_lph{$lph});
62	push(@sphlist, $sph_for_lph{$lph});
63    }
64    $sphPron = join("",@sphlist);
65    print OO "$word $sphPron\n";
66}
67
68close(HH);
69close(OO);
70
71