pht_to_long.pl revision 8fc5a7f51e62cb4ae44a27bdf4176d04adc80ede
17979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.orguse Getopt::Long; 23484964a86451e86dcf04be9bd8c0d76ee04f081rossberg@chromium.org 33484964a86451e86dcf04be9bd8c0d76ee04f081rossberg@chromium.org$rc = GetOptions("pht=s" => \$phtfile, 47979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org "ok=s" => \$okfile, 57979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org "i=s" => \$okfile, 67979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org "otxt=s" => \$otxt, 77979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org "o=s" => \$otxt, 85de0074a922429f5e0ec2cf140c2d2989bf88140yangguo@chromium.org "showerrs" => \$showerrs); 9196eb601290dc49c3754da728dc58700dff2de1bmachenbach@chromium.org 10196eb601290dc49c3754da728dc58700dff2de1bmachenbach@chromium.orgif(defined $phtfile) { 117979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org load_phtfile( $phtfile); 127979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org} 137979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org 147979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.orgsub load_phtfile 157979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org{ 167979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org my $phtfile = shift(@_); 177979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org $lphhash{"&"}++; 187979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org $lph_for_sph{"&"} = "&"; 197979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org open(PHT, "<$phtfile") || die "error opening phtfile $phtfile\n"; 207979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org print STDERR "using phtfile $phtfile\n"; 217979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org <PHT>; # header 227979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org while(<PHT>) { 237979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org s/\s+$//g; 247028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org ($trash,$lph,$sph,$num_states) = split(/\s+/); 257028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org $lph_for_sph{$sph} = $lph; 26ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org $lphhash{$lph}++; 27ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org } 287028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org close(PHT); 29ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org} 30ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org 31ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.orgopen(HH, "<$okfile") || die "error opening okfile $okfile\n"; 32ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.orgopen(OO, ">$otxt") || die "error opening output dict $otxt\n"; 337028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.orgwhile(<HH>) { 34ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org s/\s+$//; 35ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org if(/^LANG\s*=\s*(\S+)/) { # LANG = EN-US 36ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org my $language = lc($1); 377028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org my $language_header_line = $_; 38ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org $language =~ s/\-/\./g; 397028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.org if(!defined $phtfile) { 40ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org die "Error: ESRSDK is not defined\n" if(!defined $ENV{ESRSDK}); 41ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org $phtfile = "$ENV{ESRSDK}/config/$language/models/generic.pht"; 42ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org load_phtfile( $phtfile); 43ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org } 44ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org print OO "$language_header_line\n"; 45ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org next; 46ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org } 47ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org ($word, $pron) = split(/\s+/); 48ddd545c4c343dcf4331b9d80d2a0bdfa373a4a0fricow@chromium.org @sphlist = split(/ */, $pron); 491456e708d277e725ca42a03463af16fe471c9210jkummerow@chromium.org @lphlist = (); 501456e708d277e725ca42a03463af16fe471c9210jkummerow@chromium.org $nerrs = 0; 511456e708d277e725ca42a03463af16fe471c9210jkummerow@chromium.org foreach $sph (@sphlist) { 521456e708d277e725ca42a03463af16fe471c9210jkummerow@chromium.org $lph = $lph_for_sph{$sph}; 531456e708d277e725ca42a03463af16fe471c9210jkummerow@chromium.org if(!defined $lph) { 541456e708d277e725ca42a03463af16fe471c9210jkummerow@chromium.org warn "error: unknown sph $sph in $word $pron\n" ; 557979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org $lph = "($sph)"; 567979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org $nerrs++; 577979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org } 587979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org push(@lphlist, $lph); 597979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org } 607979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org next if($nerrs && !$showerrs) ; 617979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org print OO "$word \t @lphlist\n"; 627979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org} 637028c05c1c71b9d5c5fe1bca01f2461d17a2dda7mmassi@chromium.orgclose(HH); 647979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.orgclose(OO); 657979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org