[go: up one dir, main page]

Menu

[r7]: / scripts / get_moses_scores.sh  Maximize  Restore  History

Download this file

29 lines (24 with data), 627 Bytes

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#!/bin/bash
# Uso: get_moses_scores.sh <phrase-table.gz> <output.vocabext> <output.arpa>
zcat $1 | sed "s/|||/_#_/g" |
awk 'BEGIN{FS = " _#_ "}{
gsub(" ","B",$1);
n=gsub(" ","A",$2);
printf("%d %sA%s %s %f\n", NR,$1,$2,$NF,exp(n+1))}' > /tmp/kk
unigrams=`tail -1 /tmp/kk | awk '{print $1}'`
awk '{print $1,$2,$4,$5,$6,$7,$8}' /tmp/kk > $2
awk -v t=$unigrams 'BEGIN{
print "";
print "\\data\\";
printf("ngram 1=%d\n", t+3);
print "";
print "\\1-grams:";
}{
printf("%f\t%d\n", log($3)/log(10),NR)}
END{
printf("0.000000\t</s>\n");
printf("-99\t<s>\n");
printf("0.000000\t<unk>\n");
print "";
print "\\end\\";
}' /tmp/kk > $3