[go: up one dir, main page]

File: maf2phy.awk

package info (click to toggle)
andi 0.12-4
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 1,016 kB
  • sloc: ansic: 2,107; cpp: 382; sh: 196; makefile: 98; awk: 51
file content (59 lines) | stat: -rw-r--r-- 1,257 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# maf2phy.awk
# Author: Bernhard Haubold, haubold@evolbio.mpg.de
# Contributors: Fabian Klötzl, kloetzl@evolbio.mpg.de
# Date: June 19, 2014
# Last Modified: February 5, 2015
BEGIN{
  if(!n){
    print "maf2phy.awk: Convert mutation annotation format (maf) as generated by the program mugsy to PHYLIP";
    print "Usage: awk -f maf2phy.awk -v n=<numberOfTaxa> file.maf > file.phy";
    exit(-1);
  }
  numName = 0;
  test = "mult=" n;
}{
  if(/^a/){
    if($0 ~ test)
      open = 1;
    else
      open = 0;
  }
  if(open && /^s/){
    if(!s[$2])
      names[numNames++] = $2;
    s[$2] = s[$2] $7;
  }
}END{
  # check equal length of sequences
  len = -1;
  for(i=0;i<numNames;i++){
    name = names[i];
    if(len > 0){
      if(length(s[name]) != len){
	print "sequence length should be " len " but is in fact " length(s[name]);
	exit(-1);
      }
    }else
      len = length(s[name]);
  }
  print numNames, len;
  start = 1;
  l = 60;
  for(i=0;i<numNames;i++){
    name = names[i];
    printf("%-10.10s",name);
    print(" " substr(s[name],start,l));
  }
  printf("\n");
  start += l;
  while(start < len){
    for(i=0;i<numNames;i++){
      name = names[i];
      print(substr(s[name],start,l));
    }
    printf("\n");
    start += l;
  }
}