[go: up one dir, main page]

Menu

[78dfa4]: / longread.awk  Maximize  Restore  History

Download this file

121 lines (108 with data), 2.8 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#awk -f longread.awk aln.paf
#aln.paf: minimap2
#process long read alignment data for Lep-Anchor
BEGIN{
#mapping quality limit
if (qLimit == "")
qLimit=0
FS="\t"
rev["+"] = "-"
rev["-"] = "+"
}
(NF>=12 && $12>=qLimit){
for (i = 13; i<=NF;++i)
if ($i == "tp:A:S")
next #do not process secondary alignments
if ($1 == prevR) {
++n
} else {
processAllPairs()
n = 1
}
#print within contig part
print $6"\t"($8 + 1)"\t"$6"\t"$9"\t++\t"$12
print $6"\t"$9"\t"$6"\t"($8 + 1)"\t--\t"$12
o[n]=$5 #orientation
c[n]=$6 #contig
start[n]=$8+1 #start
end[n]=$9 #end
rstart[n]=$3+1 #read start
rend[n]=$4 #read end
q[n]=$12 #quality
prevR = $1
}
function min(a,b){
if (a <= b)
return a
else
return b
}
#find the shortest pair in read distance between each aligned region and contig
function processAllPairs( i,j,k,pos,count,rindex)
{
if (n <= 1)
return
if (n == 2) {
i = 1
j = 2
processOnePair(c[i],start[i],end[i],rstart[i],rend[i],o[i],c[j],start[j],end[j],rstart[j],rend[j],o[j],min(q[i], q[j]))
return
}
#sort reads based on rstart
for (i = 1; i <= n; ++i) {
pos[i] = rstart[i]
rindex[rstart[i], ++count[rstart[i]]] = i
}
asort(pos)
#process only adjacent reads (maybe could skip some?)
for (k = 1; k < n; ++k) {
i = rindex[pos[k], count[pos[k]]--]
j = rindex[pos[k + 1], count[pos[k + 1]]]
processOnePair(c[i],start[i],end[i],rstart[i],rend[i],o[i],c[j],start[j],end[j],rstart[j],rend[j],o[j],min(q[i], q[j]))
#printf pos[k]"->"pos[k+1] " " i " " j "\t"
}
#print ""
# for (i = 1; i <= n; ++i)
# for (j = i + 1; j <= n; ++j)
# processOnePair(c[j],start[j],end[j],rstart[j],rend[j],o[j],c[i],start[i],end[i],rstart[i],rend[i],o[i],min(q[i], q[j]))
}
function processOnePair(c1, start1, end1, rstart1, rend1, o1, c2, start2, end2, rstart2, rend2, o2, quality ,lp1,lp2,overlap,overlap1,overlap2)
{
# if (c1 == c2 && o1 != o2)
# return
if (rstart1 > rstart2) {
processOnePair(c2, start2, end2, rstart2, rend2, o2, c1, start1, end1, rstart1, rend1, o1, quality)
return;
}
#now rstart1 <= rstart2
# print "process"
overlap = 0
if (rend1 >= rstart2)
overlap = rend1 - rstart2 + 1
#overlap is too large
if (overlap > end1 - start1 + 1 && overlap > end2 - start2 + 1)
return
#remove thw overlap from one of the mappings with longer overhang...
if (end1 - start1 >= end2 - start2) {
overlap1 = overlap
overlap2 = 0
} else {
overlap1 = 0
overlap2 = overlap
}
if (o1 == "+")
lp1 = end1 - overlap1
else
lp1 = start1 + overlap1
if (o2 == "+")
lp2 = start2 + overlap2
else
lp2 = end2 - overlap2
#print both ways
print c1"\t"lp1"\t"c2"\t"lp2"\t"o1 o2"\t"quality
print c2"\t"lp2"\t"c1"\t"lp1"\t"rev[o2] rev[o1]"\t"quality
# print "end"
}
END{
processAllPairs()
}