[go: up one dir, main page]

Menu

[f4ef20]: / BLS / maf2fasta.pl  Maximize  Restore  History

Download this file

132 lines (110 with data), 2.3 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/perl -w
use strict;
use warnings;
use File::Basename;
use Getopt::Long;
use Carp;
use Sequence;
my $prog = basename($0);
my $noGap = 0;
my $noMap = 0;
my $maskRepeats = 0;
my $verbose = 0;
my $segment = 0;
GetOptions ('m|mask_repeat'=>\$maskRepeats,
'G|no-gap'=>\$noGap,
'nomap'=>\$noMap,
's|segment-sequence:i'=>\$segment,
'v|verbose'=>\$verbose
);
if (@ARGV != 2)
{
print "Convert MAF files to a fasta file\n";
print "Usage: $prog [options] <in.maf> <out.fa>\n";
print "OPTIONS:\n";
print " -m : mask repeats <on|[off]>\n";
print " -G : remove gap <on|[off]>\n";
print " -nomap : do not write gap map\n";
print " -s [int]: segment sequences ($segment) (0=no segment)\n";
print " -v : verbose <on|[off]>\n";
print "\n";
exit (1);
}
my ($inMafFile, $outFastaFile) = @ARGV;
my $fin;
open ($fin, "<$inMafFile") || Carp::croak "can not open file $inMafFile to read\n";
my $fout;
open ($fout, ">$outFastaFile") || Carp::croak "can not open file $outFastaFile to write\n";
my $blockName = "";
my $seqPrefix = "";
my $blockIter = -1;
while (my $line = <$fin>)
{
chomp $line;
if ($line =~/^\#BLOCK_NAME=(.*?)$/)
{
$blockName = $1;
}
elsif ($line =~/^a/)
{
$blockIter++;
if ($blockName ne "")
{
$seqPrefix = $blockName;
}
else
{
$seqPrefix = "b$blockIter";
}
}
elsif ($line =~/^\s*$/)
{
$blockName = "";
print $fout "\n";
}
elsif ($line =~/^s/)
{
my @cols = split (/\s+/, $line);
my $db = $cols[1];
if ($db=~/^(\w+)\./)
{
$db = $1;
}
my $seq = $cols[6];
my $header = "$seqPrefix.$db";
if ($noGap)
{
my $gapLen = 0;
my @gapMap;
if ($seq!~/^\-/)
{
push @gapMap, "0->0";
}
while ($seq=~/(\-+)/g)
{
$gapLen += length($1);
#print "$seqPrefix.$db=$gapLen\n";
my $gapEnd = pos($seq);
my $gaplessEnd = $gapEnd - $gapLen;
push @gapMap, "$gaplessEnd->$gapEnd";
}
$header .= "\tMAP=" . join (",", @gapMap) unless $noMap;
$seq =~s/-//g;
}
if ($maskRepeats)
{
$seq =~tr/acgt/nnnn/;
}
print $fout ">$header\n";
if ($segment > 0)
{
print $fout segmentStr ($seq, int($segment)), "\n";
}
else
{
print $fout $seq, "\n";
}
}
}
close ($fin);
close ($fout);