1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
|
# maf2phy.awk
# Author: Bernhard Haubold, haubold@evolbio.mpg.de
# Contributors: Fabian Klötzl, kloetzl@evolbio.mpg.de
# Date: June 19, 2014
# Last Modified: February 5, 2015
BEGIN{
if(!n){
print "maf2phy.awk: Convert mutation annotation format (maf) as generated by the program mugsy to PHYLIP";
print "Usage: awk -f maf2phy.awk -v n=<numberOfTaxa> file.maf > file.phy";
exit(-1);
}
numName = 0;
test = "mult=" n;
}{
if(/^a/){
if($0 ~ test)
open = 1;
else
open = 0;
}
if(open && /^s/){
if(!s[$2])
names[numNames++] = $2;
s[$2] = s[$2] $7;
}
}END{
# check equal length of sequences
len = -1;
for(i=0;i<numNames;i++){
name = names[i];
if(len > 0){
if(length(s[name]) != len){
print "sequence length should be " len " but is in fact " length(s[name]);
exit(-1);
}
}else
len = length(s[name]);
}
print numNames, len;
start = 1;
l = 60;
for(i=0;i<numNames;i++){
name = names[i];
printf("%-10.10s",name);
print(" " substr(s[name],start,l));
}
printf("\n");
start += l;
while(start < len){
for(i=0;i<numNames;i++){
name = names[i];
print(substr(s[name],start,l));
}
printf("\n");
start += l;
}
}
|