-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcsfasta2fasta.pl
executable file
·101 lines (73 loc) · 1.91 KB
/
csfasta2fasta.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/env perl
use strict;
die "program csfasta shift\nif shift is 1, the first base is omitted in the output\n" if (@ARGV < 1);
my $csfasta_file = $ARGV[0];
my %colourspace = colour_space();
my $shift = 0;
my %sequences = read_csfasta( $csfasta_file );
$shift = $ARGV[1] if( @ARGV == 2 );
foreach my $key ( keys %sequences ){
print ">$key\n";
my $seq = $sequences{ $key };
my @letters = split( //, $seq );
my $first_base = $letters[0];
for( my $i = 1; $i < @letters ; $i++ ){
my $colour = $letters[$i];
my $encoding = $first_base.$colour;
$first_base = $colourspace{ $encoding };
$letters[ $i ] = $first_base;
}
shift( @letters ) if( $shift );
$" = "";
print "@letters\n";
}
sub colour_space{
my %hash = (
"A0" => "A",
"C0" => "C",
"G0" => "G",
"T0" => "T",
"A1" => "C",
"C1" => "A",
"G2" => "A",
"A2" => "G",
"A3" => "T",
"T3" => "A",
"C2" => "T",
"T2" => "C",
"C3" => "G",
"G3" => "C",
"G1" => "T",
"T1" => "G" );
return %hash;
}
sub read_csfasta{
my ( $csfasta_file) = @_;
my %sequences;
my $first = 1;
my $header;
my $sequence;
open(CSFASTA, $csfasta_file);
while( my $line = <CSFASTA> ){
chomp $line;
if( ! $first ){ # $sequence and header are not initialized in first iteration
if( $line =~ /\>/ ){ # encounters a new sequence
$sequences{$header} = $sequence;
$header = $line; $header =~ tr/\>//d; # read a new header line
$sequence = "";
}
else{ $sequence .= $line; # concatenate sequences
}
}
else{ # read the header line
if( $line =~ /\>/ ){
$header = $line; $header =~ tr/\>//d;$sequence = "";
}
}
$first = 0;
}
close( CSFASTA );
## the last one
$sequences{$header} = $sequence;
return %sequences;
}