forked from freebayes/freebayes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfreebayes-parallel
executable file
·40 lines (36 loc) · 1.54 KB
/
freebayes-parallel
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/bin/bash
if [ $# -lt 3 ];
then
echo "usage: $0 [regions file] [ncpus] [freebayes arguments]"
echo
echo "Run freebayes in parallel over regions listed in regions file, using ncpus processors."
echo "Will merge and sort output, producing a uniform VCF stream on stdout. Flags to freebayes"
echo "which would write to e.g. a particular file will obviously cause problms, so caution is"
echo "encouraged when using this script."
echo
echo "examples:"
echo
echo "Run freebayes in parallel on 100000bp chunks of the ref (fasta_generate_regions.py is also"
echo "located in the scripts/ directory in the freebayes distribution). Use 36 threads."
echo
echo " freebayes-parallel <(fasta_generate_regions.py ref.fa.fai 100000) 36 -f ref.fa aln.bam >out.vcf"
echo
echo "Generate regions that are equal in terms of data content, and thus have lower variance"
echo "in runtime. This will yield better resource utilization."
echo
echo " bamtools coverage -in aln.bam | coverage_to_regions.py ref.fa 500 >ref.fa.500.regions"
echo " freebayes-parallel ref.fa.500.regions 36 -f ref.fa aln.bam >out.vcf"
echo
exit
fi
regionsfile=$1
shift
ncpus=$1
shift
command=("freebayes" "$@")
(
#$command | head -100 | grep "^#" # generate header
# iterate over regions using gnu parallel to dispatch jobs
cat "$regionsfile" | parallel -k -j "$ncpus" "${command[@]}" --region {}
) | ../vcflib/scripts/vcffirstheader \
| ../vcflib/bin/vcfstreamsort -w 1000 | vcfuniq # remove duplicates at region edges