forked from liangclab/HERA
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ParseLA.pl
executable file
·97 lines (75 loc) · 2.39 KB
/
ParseLA.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
use strict;
use warnings;
use Cwd;
my $RefNameFile = 'ParseDAZZDB.txt';
my %ContigNameHash = ();
my %ContigLengthHash = ();
open( REFFILE, "<$RefNameFile") || die "cannot open file $RefNameFile \n" ;
# 1 CTG1-1 78097
# 2 CTG1-2 52763
while(<REFFILE>)
{
my($line) = $_;
chomp($line);
next if ( $line eq '' );
next if ( $line =~ m/^#/ );
my ( $ReadNum, $ContigName, $ContigLength ) = split ( /\s+/, $line );
$ContigNameHash{$ReadNum} = $ContigName;
$ContigLengthHash{$ReadNum} = $ContigLength;
} # end of while
close( REFFILE );
my $ReadNum1 = 0;
my $ReadNum2 = 0;
my $Strand = '';
my $Read1Start = 0;
my $Read1End = 0;
my $Read2Start = 0;
my $Read2End = 0;
my $Diff = 0;
print "Strand\t";
print "Ref\tRefStart\tRefEnd\tRefLength\t";
print "Qry\tQryStart\tQryEnd\tQryLength\t";
print "Diff\n";
while(<STDIN>)
{
my($line) = $_;
chomp($line);
# + P 2791
# % P 65
# + T 55594
# % T 1790
# @ T 1112
next if ( $line =~ m/^\+/ );
next if ( $line =~ m/^@/ );
next if ( $line =~ m/^%/ );
# P 1 22314 c
# C 0 27038 70434 97489
# D 62
# P 3 22127 n
# C 74110 76280 0 2189
# D 134
# ReadNum
if ( $line =~ m/^P/ ) {
$ReadNum1 = ( split( /\s+/, $line ) )[1];
$ReadNum2 = ( split( /\s+/, $line ) )[2];
$Strand = ( split( /\s+/, $line ) )[3];
$Strand = ( $Strand eq 'n' ) ? '+' : '-';
}elsif ( $line =~ m/^C/ ) {
$Read1Start = ( split( /\s+/, $line ) )[1];
$Read1End = ( split( /\s+/, $line ) )[2];
$Read2Start = ( split( /\s+/, $line ) )[3];
$Read2End = ( split( /\s+/, $line ) )[4];
}elsif ( $line =~ m/^D/ ) {
$Diff = ( split( /\s+/, $line ) )[1];
print $Strand."\t";
print $ContigNameHash{$ReadNum1}."\t";
print $Read1Start."\t";
print $Read1End."\t";
print $ContigLengthHash{$ReadNum1}."\t";
print $ContigNameHash{$ReadNum2}."\t";
print $Read2Start."\t";
print $Read2End."\t";
print $ContigLengthHash{$ReadNum2}."\t";
print $Diff ."\n";
}
} # end of while