Code for handling sequences
CONDA_SUBDIR=osx-64 conda create -n rosetta python # create a new environment called rosetta with intel packages. conda activate rosetta python -c "import platform;print(platform.machine())" conda config --env --set subdir osx-64 # make sure that conda commands in this environment use intel packages
awk '/^>/{print ">" substr(FILENAME,1,length(FILENAME)-6); next} 1' *.fasta
seqkit stats -a reads.fasta
eqkit fq2fa L6_Illumina_Trimmed.fastq -o L6_Illumina_Trimmed.fasta
seqkit replace -p .+ -r "seq_{nr}"
seqkit seq CN3704.fasta -r -p -t DNA > CN3704_UKHSA_rev.fasta
seqkit seq -g NAME.fasta > NEW_NAME.fasta
seqkit grep -f yagA.txt pangenome_reference.fa -o yagA_Reads.fasta
Annotate a vcf file with depth of reference and alternative reads for each SNP (fwd and rev for each):
medaka tools annotate --dpsp ./medaka/medaka.sorted.vcf L10_Stock_Assembly.fasta ./medaka/calls_to_ref.bam ./medaka/L10_Broth_SNPs.vcf
fastp -i Sp_clone_S2_S73_R1_001.fastq -I Sp_clone_S2_S73_R2_001.fastq -g -f 20 -F 20 -t 5 -T 5 -o S2_R1.fastq -O S2_R2.fastq
samtools coverage NAME.bam
prokka --outdir L5_Stock --genus Bordetella --species pertussis --strain L5_Stock --centre XXX L5_Stock_final.fasta
makeblastdb -in $file -parse_seqids -dbtype nucl
#!/bin/bash #Loop for making each genome into a database files=./Raw_Reads/SRR12168673.fasta for file in $files do makeblastdb -in $file -parse_seqids -dbtype nucl; done
#Loop for conducting BLAST search in each database
for database in ./Raw_Reads/SRR12168673.fasta; do
blastn
-query First_40_IS481.txt
-db $database
-max_target_seqs 50000
-outfmt "6 std sstrand"
-out $database.BLAST.Result.First.txt;
done