一段小程序

一段小程序

一段小程序
刚刚开始接触perl ,实验室的老板逼得很紧,抓耳挠腮,想向各位大侠帮帮小妹,看看这段程序的功能,大概得说说是做什么的就好```
谢过啦~~


$discount = shift ;#must be between 0 and 1
$eng_train = shift ;
$chi_train = shift ;
$test = shift ;



open ENG_TRAIN , $eng_train ;
open CHI_TRAIN , $chi_train ;

open BD, ">bd.txt" ;
open BN, ">bn.txt" ;
open BZ, ">bz.txt" ;
open UN, ">un.txt" ;
open PAIR,">pair.txt";
open PRO, ">probability.txt";

while(<CHI_TRAIN>)
{
chomp;
$cn = "<s> " ;
$cn .= $_ ;
$cn .= " </s>" ;
@cn_units = split(" ",$cn);

$en_line = <ENG_TRAIN> ;
chomp;
$eng = "<s> " ;
$eng .= $en_line ;
$eng .= " </s>" ;
@en_units = split(" ",$eng);

foreach $index (0..$#cn_units-1)
{
$BD{"$cn_units[$index]_$en_units[$index]"}++;
#increment Bigram Denominator 二元分母增量

if(not exists $BN{"$cn_units[$index]_$en_units[$index] $cn_units[$index+1]_$en_units[$index+1]"})
{
$BN{"$cn_units[$index]_$en_units[$index] $cn_units[$index+1]_$en_units[$index+1]"}++;
#increment Bigram Numerator 二元分子增量

$BZ{"$cn_units[$index]_$en_units[$index]"}++;
#if needed ,increment Bigram non-Zero counts,如果需要,二元非零数增量

$UD++ ;
#increment Unigram Denominator
$UN{"$cn_units[$index+1]_$en_units[$index+1]"}++ ;
#increment Unigram Numerator
}
else
{
$BN{"$cn_units[$index]_$en_units[$index] $cn_units[$index+1]_$en_units[$index+1]"}++;
#increment Bigram Numerator 二元分子增量
}

}
}
close CHI_TRAIN;
close ENT_TRAIN;

print "BD BEGIN\n" ;
print BD "$_:$BD{$_}\n" foreach (sort keys %BD);
print "BD END\n";

print "BN BEGIN\n" ;
print BN "$_:$BN{$_}\n" foreach (sort keys %BN);
print PAIR "$_\n" foreach(sort keys %BN) ;
print "BN END\n";

print "UN BEGIN\n" ;
print UN "$UD\n" ;
print UN "$_:$UN{$_}\n" foreach (sort keys %UN);
print "UN END\n";

print "BZ BEGIN\n" ;
print BZ "$_:$BZ{$_}\n" foreach (sort keys %BZ);
print "BZ END\n";

close BD ;
close BN ;
close BZ ;
close UN ;
close PAIR;

open TEST ,$test ;
while(<TEST>)
{
chomp;
@pairs = split(" ");

$unigram = $UN{$pairs[1]} / $UD;
#print "unigram:$unigram\n" ;
if(defined($BD{$pairs[0]}))
{
#print "go into if1\n" ;
if(defined ($BN{"$pairs[0] $pairs[1]"}))
{
#print "go into if2\n" ;
$bigram = ($BN{"$pairs[0] $pairs[1]"} - $discount)/ $BD{$pairs[0]};
print "bigram initial:$bigram\n" ;
}
else
{
print "go into else2\n" ;
$bigram = 0 ;
}

#$add = $BZ{$pairs[0]}*$discount /$BD{$pairs[0]} * $unigram ;
#print "$pairs[0]:$BZ{$pairs[0]} $BD{$pairs[0]}\n";
#print "discount:$discount\n" ;
#print $BZ{$pairs[0]} * $discount ,"\n";
#print "add:$add\n" ;
$bigram = $bigram + $BZ{$pairs[0]}*$discount /$BD{$pairs[0]} * $unigram;
#print "bigram final:$bigram\n" ;
$probability = $bigram;

}
else
{
print "go into else1\n" ;
$probability = $unigram;
}

print PRO "$pairs[0] $pairs[1]:" ,-log($probability),"\n";

}

close TEST;
[CCB]11[/CCB]

没空看。。。


大概看了一下,似乎是计算个数
很郁闷得看.