-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimon_scaff_editor.pl
executable file
·79 lines (62 loc) · 2.08 KB
/
simon_scaff_editor.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/env perl
#
#
use strict;
use Bio::SeqIO;
use Bio::Seq;
sub by_num { $b <=> $a }
my $scaffile = $ARGV[0];
my $reportfile = $ARGV[1];
my %scaffs = ();
my %reports = ();
open IN, $reportfile;
my $count = 0;
while(<IN>){
if(/\w+\s+\d+/){
my @tmp = split /\s+/, $_;
$reports{$tmp[0]}->{$tmp[1]}->{'Type'} = $tmp[2];
$reports{$tmp[0]}->{$tmp[1]}->{'Old'} = $tmp[3];
$reports{$tmp[0]}->{$tmp[1]}->{'New'} = $tmp[4];
$count ++;
}
}
print STDOUT "Loaded $count edit requests from $reportfile\n";
my $scio = Bio::SeqIO->new(-file => $scaffile, -format => 'Fasta');
while(my $seq = $scio->next_seq()){
$scaffs{$seq->id} = $seq->seq();
print STDOUT "Loaded ". $seq->id() . "\n";
}
foreach my $key (sort keys %reports){
print STDOUT "Working on $key..\n";
#print STDOUT "Old seq: " . $scaffs{$key} . "\n";
my %edits = %{$reports{$key}};
foreach my $pos(sort by_num keys %edits){
print STDOUT "Working on position $pos..\n";
print STDOUT $edits{$pos}->{'Type'} . "\t" . $edits{$pos}->{'Old'} . "\t" . $edits{$pos}->{'New'} . "\n";
my $t = $edits{$pos}->{'Type'};
if($t =~ /subs/i){
#substitution
print STDOUT "Its a substitution!\n";
substr($scaffs{$key}, ($pos-1), 1) = $edits{$pos}->{'New'};
}
elsif($t =~ /delet/i){
#deletion!
print STDOUT "Its a deletion!\n";
substr($scaffs{$key}, ($pos-1), 1) = "";
}
elsif($t =~ /insertion/i){
#insertion
print STDOUT "Its an insertion!\n";
substr($scaffs{$key}, ($pos-1), 0) = $edits{$pos}->{'New'};
}
#print STDOUT "New seq: " . $scaffs{$key} . "\n";
}
}
#now output everything via a bio::seqio object...
my $outfile = $scaffile . "_shrimped.fna";
my $seqout = Bio::SeqIO->new(-file => ">$outfile", -format => 'Fasta');
foreach my $key (sort keys %scaffs){
print STDOUT "Outputting new version of $key\n";
my $seq = Bio::Seq->new(-id => $key, -seq => $scaffs{$key});
$seqout->write_seq($seq);
}