一段小程序
一段小程序
刚刚开始接触perl ,实验室的老板逼得很紧,抓耳挠腮,想向各位大侠帮帮小妹,看看这段程序的功能,大概得说说是做什么的就好```
谢过啦~~
$discount = shift ;#must be between 0 and 1
$eng_train = shift ;
$chi_train = shift ;
$test = shift ;
open ENG_TRAIN , $eng_train ;
open CHI_TRAIN , $chi_train ;
open BD, ">bd.txt" ;
open BN, ">bn.txt" ;
open BZ, ">bz.txt" ;
open UN, ">un.txt" ;
open PAIR,">pair.txt";
open PRO, ">probability.txt";
while(<CHI_TRAIN>)
{
chomp;
$cn = "<s> " ;
$cn .= $_ ;
$cn .= " </s>" ;
@cn_units = split(" ",$cn);
$en_line = <ENG_TRAIN> ;
chomp;
$eng = "<s> " ;
$eng .= $en_line ;
$eng .= " </s>" ;
@en_units = split(" ",$eng);
foreach $index (0..$#cn_units-1)
{
$BD{"$cn_units[$index]_$en_units[$index]"}++;
#increment Bigram Denominator 二元分母增量
if(not exists $BN{"$cn_units[$index]_$en_units[$index] $cn_units[$index+1]_$en_units[$index+1]"})
{
$BN{"$cn_units[$index]_$en_units[$index] $cn_units[$index+1]_$en_units[$index+1]"}++;
#increment Bigram Numerator 二元分子增量
$BZ{"$cn_units[$index]_$en_units[$index]"}++;
#if needed ,increment Bigram non-Zero counts,如果需要,二元非零数增量
$UD++ ;
#increment Unigram Denominator
$UN{"$cn_units[$index+1]_$en_units[$index+1]"}++ ;
#increment Unigram Numerator
}
else
{
$BN{"$cn_units[$index]_$en_units[$index] $cn_units[$index+1]_$en_units[$index+1]"}++;
#increment Bigram Numerator 二元分子增量
}
}
}
close CHI_TRAIN;
close ENT_TRAIN;
print "BD BEGIN\n" ;
print BD "$_:$BD{$_}\n" foreach (sort keys %BD);
print "BD END\n";
print "BN BEGIN\n" ;
print BN "$_:$BN{$_}\n" foreach (sort keys %BN);
print PAIR "$_\n" foreach(sort keys %BN) ;
print "BN END\n";
print "UN BEGIN\n" ;
print UN "$UD\n" ;
print UN "$_:$UN{$_}\n" foreach (sort keys %UN);
print "UN END\n";
print "BZ BEGIN\n" ;
print BZ "$_:$BZ{$_}\n" foreach (sort keys %BZ);
print "BZ END\n";
close BD ;
close BN ;
close BZ ;
close UN ;
close PAIR;
open TEST ,$test ;
while(<TEST>)
{
chomp;
@pairs = split(" ");
$unigram = $UN{$pairs[1]} / $UD;
#print "unigram:$unigram\n" ;
if(defined($BD{$pairs[0]}))
{
#print "go into if1\n" ;
if(defined ($BN{"$pairs[0] $pairs[1]"}))
{
#print "go into if2\n" ;
$bigram = ($BN{"$pairs[0] $pairs[1]"} - $discount)/ $BD{$pairs[0]};
print "bigram initial:$bigram\n" ;
}
else
{
print "go into else2\n" ;
$bigram = 0 ;
}
#$add = $BZ{$pairs[0]}*$discount /$BD{$pairs[0]} * $unigram ;
#print "$pairs[0]:$BZ{$pairs[0]} $BD{$pairs[0]}\n";
#print "discount:$discount\n" ;
#print $BZ{$pairs[0]} * $discount ,"\n";
#print "add:$add\n" ;
$bigram = $bigram + $BZ{$pairs[0]}*$discount /$BD{$pairs[0]} * $unigram;
#print "bigram final:$bigram\n" ;
$probability = $bigram;
}
else
{
print "go into else1\n" ;
$probability = $unigram;
}
print PRO "$pairs[0] $pairs[1]:" ,-log($probability),"\n";
}
close TEST;