-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfa-reorient.pl
executable file
·95 lines (77 loc) · 3.07 KB
/
fa-reorient.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/env perl
use strict;
use Bio::SeqIO;
use Bio::SearchIO;
use File::Temp qw(tempdir);
my(@Options, $verbose, $ref, $input, $append, $blastn, $evalue);
setOptions();
-r $ref or die "Please supply a reference FASTA file with --ref / -r";
-r $input or die "Please supply an input FASTA file with --input / -i";
my $tmpdir = tempdir(CLEANUP => 1);
my $cmd = "formatdb -i '$ref' -p F -n $tmpdir/reference -l /dev/null";
print STDERR "Running: $cmd\n";
system($cmd);
my %flip;
$cmd = $blastn ? "blastall -p blastn" : "megablast";
$cmd .= " -i '$input' -d $tmpdir/reference -m 8 -v 1 -b 1 -e $evalue";
print STDERR "Running: $cmd\n";
my $bls = Bio::SearchIO->new(-file=>"$cmd |", -format=>'blasttable') or die $!;
while (my $res = $bls->next_result) {
HIT:
while (my $hit = $res->next_hit) {
while (my $hsp = $hit->next_hsp) {
die "query strand should be +1 always!" if $hsp->strand('query') != 1;
# next unless $hsp->significance < 1E-6; # done in $cmd above with -e option
my $strand = $hsp->strand('hit');
$flip{ $res->query_name } = ( $hsp->strand('hit') != $hsp->strand('query') );
last HIT; # take 1st decent hit we get
}
}
}
my $in = Bio::SeqIO->new(-file=>$input, -format=>'Fasta');
my $out = Bio::SeqIO->new(-fh=>\*STDOUT, -format=>'Fasta');
my $flipped = 0;
my $nread = 0;
while (my $seq = $in->next_seq) {
if ($flip{$seq->display_id}) {
$flipped++;
$seq = $seq->revcom;
$seq->display_id( $seq->display_id . $append ) if $append;
}
$out->write_seq($seq);
$nread++;
}
print STDERR "Re-oriented $flipped of $nread sequences.";
print STDERR " Appended '$append' to IDs." if $append;
print STDERR "\n";
#----------------------------------------------------------------------
# Option setting routines
sub setOptions {
use Getopt::Long;
@Options = (
{OPT=>"help", VAR=>\&usage, DESC=>"This help"},
{OPT=>"v|verbose!", VAR=>\$verbose, DEFAULT=>0, DESC=>"Verbose"},
{OPT=>"r|ref=s", VAR=>\$ref, DEFAULT=>'', DESC=>"Reference sequence(s) [FASTA]"},
{OPT=>"i|input=s", VAR=>\$input, DEFAULT=>'', DESC=>"Input file [FASTA]"},
{OPT=>"a|append=s", VAR=>\$append, DEFAULT=>'', DESC=>"Append this to ID if re-oriented"},
{OPT=>"b|blastn!", VAR=>\$blastn, DEFAULT=>0, DESC=>"Use BLASTN instead of MEGA-BLAST"},
{OPT=>"e|evalue=f", VAR=>\$evalue, DEFAULT=>1E-6, DESC=>"BLAST cutoff"},
);
(!@ARGV) && (usage());
&GetOptions(map {$_->{OPT}, $_->{VAR}} @Options) || usage();
# Now setup default values.
foreach (@Options) {
if (defined($_->{DEFAULT}) && !defined(${$_->{VAR}})) {
${$_->{VAR}} = $_->{DEFAULT};
}
}
}
sub usage {
print "Usage: $0 [options] --ref reference.fa --input messy.fa [--append c] > reoriented.fasta\n";
foreach (@Options) {
printf " --%-13s %s%s.\n",$_->{OPT},$_->{DESC},
defined($_->{DEFAULT}) ? " (default '$_->{DEFAULT}')" : "";
}
exit(1);
}
#----------------------------------------------------------------------