Skip to content

Commit

Permalink
Merge pull request #11 from joshuakirsch/Improve-documentation-and-fi…
Browse files Browse the repository at this point in the history
…x-error-message

Improve documentation and fix error message
  • Loading branch information
joshuakirsch authored Nov 8, 2024
2 parents 1122412 + 5e0ef5e commit 6a16e7a
Showing 1 changed file with 10 additions and 9 deletions.
19 changes: 10 additions & 9 deletions pseudoR_multi-reference.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,16 @@ mkdir ref
max_threads=${threads}
wd=$(pwd)


#build filter contigs, predict orfs, build bowtie database
cat ../${reference}/*${contig_ending} > ref/allContigs.fa
seqkit seq ref/allContigs.fa -m 1000 > ref/contigs.1k.fa
python ${database}/pprodigal.py -i ref/contigs.1k.fa -p meta -d ref/orfs.nucl.fa -T ${max_threads}
dedupe.sh in=ref/orfs.nucl.fa out=ref/orfs.nucl.dedupe.fa minidentity=90 overwrite=true threads=${max_threads}
seqkit fx2tab -n -i -l ref/orfs.nucl.dedupe.fa > ref/orfs.nucl.dedupe.lengths.txt
bowtie2-build ref/orfs.nucl.dedupe.fa ref/orfs.nucl --threads ${max_threads} -q --large-index

#build file with location of all ORFs and deduplicated ORFs
seqkit fx2tab -n ref/orfs.nucl.dedupe.fa > temp/orf_temp.txt
cut -f1 -d " " temp/orf_temp.txt > temp/temp_name_1.txt
cut -f3 -d " " temp/orf_temp.txt > temp/temp_name_2.txt
Expand Down Expand Up @@ -73,15 +76,12 @@ bowtie2 -x ref/contigs -p ${max_threads} -1 temp/read1.fq \
samtools view -Sb -@ {max_threads} temp/temp.sam > temp/temp1.bam
samtools sort -@ ${max_threads} -m 4G -o mapping_files/${i}.contig.reads_mapped.bam temp/temp1.bam

#samtools fastq -f 4 -N -@ ${max_threads} -s temp/unmapped.s.fq -1 temp/unmapped.1.fq -2 temp/unmapped.2.fq mapping_files/${i}.contig.reads_mapped.bam > blank.txt
#cat temp/unmapped.s.fq temp/unmapped.1.fq temp/unmapped.2.fq > mapping_files/${i}.unmapped.fq
#reformat.sh in=temp/temp.sam out=mapping_files/${i}.unmapped.fq unmappedonly=t overwrite=true primaryonly=t

samtools fastq -f 4 temp/temp.sam > mapping_files/${i}.unmapped.fq

seqkit replace -p .+ -r "seq_{nr}" mapping_files/${i}.unmapped.fq > mapping_files/${i}.unmapped.numbered.fq
seqkit fq2fa mapping_files/${i}.unmapped.numbered.fq > unmapped.fa

#blast unmapped reads against IS termini database
mkdir blast_results

seqkit split2 -p ${max_threads} --force unmapped.fa
Expand All @@ -92,10 +92,12 @@ cat unmapped_list.txt | xargs -P${max_threads} -n1 -I% blastn -db ${IR_database}

cat blast_results/* > mapping_files/${i}_blast_results.txt


#trim off parts of reads with IS termini
cut -f1 mapping_files/${i}_blast_results.txt | seqkit grep -f - mapping_files/${i}.unmapped.numbered.fq | seqkit fx2tab > blast_results_reads.tsv
timeout 20m Rscript --vanilla ${database}/V9_analysis.R ${i}
seqkit tab2fx ${i}_trimmed_reads.tsv > trimmed_reads.fq
#re-map trimmed reads against contigs

bowtie2 -x ref/contigs --very-sensitive -p ${max_threads} -U trimmed_reads.fq | samtools view -@ 5 -Sb -o mapping_files/${i}.unmapped.contig_mapping.bam
samtools view -F 4 mapping_files/${i}.unmapped.contig_mapping.bam > mapping_files/${i}.unmapped.contig_mapping.filtered.sam

Expand All @@ -105,9 +107,7 @@ rm -r blast_results
rm -r unmapped.fa.split

#map to ORFs and do IS trimming/mapping [use previously found mapping reads and unmapped reads]
#samtools fastq -F 4 -N -@ ${max_threads} -s temp/mapped.s.fq -1 temp/mapped.1.fq -2 temp/mapped.2.fq mapping_files/${i}.contig.reads_mapped.bam > blank.txt
#repair.sh in=temp/mapped.1.fq in2=temp/mapped.2.fq out1=temp/mapped.repaired.1.fq out2=temp/mapped.repaired.2.fq overwrite=true
#reformat.sh in=temp/temp.sam out=temp/mapped.fastq mappedonly=t overwrite=true primaryonly=t

samtools fastq -F 4 temp/temp.sam > temp/mapped.fastq

repair.sh in=temp/mapped.fastq out1=temp/mapped.repaired.1.fq out2=temp/mapped.repaired.2.fq outs=temp/mapped.s.fq overwrite=true
Expand All @@ -123,10 +123,11 @@ seqkit tab2fx ${i}_trimmed_reads.tsv > trimmed_reads.fq
bowtie2 -x ref/orfs.nucl --very-sensitive -p ${max_threads} -U trimmed_reads.fq | samtools view -@ 5 -Sb -o mapping_files/${i}.unmapped.orf_mapping.bam
samtools view -F 4 mapping_files/${i}.unmapped.orf_mapping.bam > mapping_files/${i}.unmapped.orf_mapping.filtered.sam

cut -f3 mapping_files/${i}.orf_mapping.filtered.sam | sort | uniq > mapping_files/${i}.contig_IS_hits.txt
cut -f3 mapping_files/${i}.unmapped.orf_mapping.filtered.sam | sort | uniq > mapping_files/${i}.contig_IS_hits.txt


done
#run post-mapping processes to generate output data

bash ${database}/post-analysis_V3.sh -s ../${sraList} -d ${database}
Rscript --vanilla ${database}/Final_Output.multiReference.R ../${sraList} final_results/contig_analysis.step1.tsv ${database} contig
Expand Down

0 comments on commit 6a16e7a

Please sign in to comment.