pht_to_long.pl revision 8fc5a7f51e62cb4ae44a27bdf4176d04adc80ede
17979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.orguse Getopt::Long;
23484964a86451e86dcf04be9bd8c0d76ee04f081rossberg@chromium.org
33484964a86451e86dcf04be9bd8c0d76ee04f081rossberg@chromium.org$rc = GetOptions("pht=s" => \$phtfile,
47979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org		 "ok=s" => \$okfile,
57979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org		 "i=s" => \$okfile,
67979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org		 "otxt=s" => \$otxt,
77979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org		 "o=s" => \$otxt,
85de0074a922429f5e0ec2cf140c2d2989bf88140yangguo@chromium.org		 "showerrs" => \$showerrs);
9196eb601290dc49c3754da728dc58700dff2de1bmachenbach@chromium.org
10196eb601290dc49c3754da728dc58700dff2de1bmachenbach@chromium.orgif(defined $phtfile) {
117979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org    load_phtfile( $phtfile);
127979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org}
137979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org
147979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.orgsub load_phtfile
157979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org{
167979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org    my $phtfile = shift(@_);
177979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org    $lphhash{"&"}++;
187979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org    $lph_for_sph{"&"} = "&";
197979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org    open(PHT, "<$phtfile") || die "error opening phtfile $phtfile\n";
207979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org    print STDERR "using phtfile $phtfile\n";
217979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org    <PHT>;  # header
227979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org    while(<PHT>) {
237979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org	s/\s+$//g;
247028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org	($trash,$lph,$sph,$num_states) = split(/\s+/);
257028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org	$lph_for_sph{$sph} = $lph;
26ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org	$lphhash{$lph}++;
27ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org    }
287028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org    close(PHT);
29ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org}
30ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org
31ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.orgopen(HH, "<$okfile") || die "error opening okfile $okfile\n";
32ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.orgopen(OO, ">$otxt") || die "error opening output dict $otxt\n";
337028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.orgwhile(<HH>) {
34ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org    s/\s+$//;
35ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org    if(/^LANG\s*=\s*(\S+)/) { # LANG = EN-US
36ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org	my $language = lc($1);
377028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org	my $language_header_line = $_;
38ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org	$language =~ s/\-/\./g;
397028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org	if(!defined $phtfile) {
40ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org	    die "Error: ESRSDK is not defined\n" if(!defined $ENV{ESRSDK});
41ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org	    $phtfile = "$ENV{ESRSDK}/config/$language/models/generic.pht";
42ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org	    load_phtfile( $phtfile);
43ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org	}
44ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org	print OO "$language_header_line\n";
45ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org	next;
46ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org    }
47ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org    ($word, $pron) = split(/\s+/);
48ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org    @sphlist = split(/ */, $pron);
491456e708d277e725ca42a03463af16fe471c9210jkummerow@chromium.org    @lphlist = ();
501456e708d277e725ca42a03463af16fe471c9210jkummerow@chromium.org    $nerrs = 0;
511456e708d277e725ca42a03463af16fe471c9210jkummerow@chromium.org    foreach $sph (@sphlist) {
521456e708d277e725ca42a03463af16fe471c9210jkummerow@chromium.org	$lph = $lph_for_sph{$sph};
531456e708d277e725ca42a03463af16fe471c9210jkummerow@chromium.org	if(!defined $lph) {
541456e708d277e725ca42a03463af16fe471c9210jkummerow@chromium.org	    warn "error: unknown sph $sph in $word $pron\n" ;
557979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org	    $lph = "($sph)";
567979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org	    $nerrs++;
577979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org	}
587979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org	push(@lphlist, $lph);
597979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org    }
607979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org    next if($nerrs && !$showerrs) ;
617979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org    print OO "$word \t @lphlist\n";
627979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org}
637028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.orgclose(HH);
647979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.orgclose(OO);
657979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org