-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
478c6e2
commit f5367fa
Showing
6 changed files
with
181 additions
and
52 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,42 @@ | ||
#! /usr/bin/env python3 | ||
""" | ||
get_mr0_test_cases.py | ||
Creates fastq files with dummy scores from a folder containing fasta files. | ||
Author Loaiza, K. | ||
Comments Created: September, 2021. | ||
""" | ||
|
||
# Libraries | ||
import os | ||
from Bio import SeqIO | ||
|
||
# Folders | ||
in_dir = 'validation_seqs' | ||
out_dir = 'mr0_test_cases' | ||
os.mkdir(out_dir) | ||
|
||
# Create the new output folder | ||
if not os.path.exists(out_dir): | ||
os.mkdir(out_dir) | ||
# Remove the old output folder | ||
else: | ||
os.remove(out_dir) | ||
|
||
# Get fasta files | ||
files_list = os.listdir(in_dir) | ||
files_list = [file for file in files_list if file.endswith('.fasta')] | ||
|
||
for file in files_list: | ||
# Get filename without .fasta | ||
# In1538_MF974580.fasta > In1538_MF974580 | ||
name = os.path.splitext(file)[0] | ||
# Get filepath | ||
# validation_seqs/In1538_MF974580.fasta | ||
old_file = os.path.join(in_dir, file) | ||
for r in SeqIO.parse(old_file, "fasta"): | ||
r.letter_annotations["solexa_quality"] = [40] * len(r) | ||
# Open the fasta file | ||
for r in SeqIO.parse(old_file, 'fasta'): | ||
# Assign dummy quality scores | ||
r.letter_annotations['solexa_quality'] = [40] * len(r) | ||
# Get the new filepath | ||
new_file = os.path.join(out_dir, name) | ||
SeqIO.write(r, f"{new_file}.fastq", "fastq") | ||
# Save the fasta with quality scores (fastq) | ||
SeqIO.write(r, f'{new_file}.fastq', 'fastq') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,27 +1,55 @@ | ||
#! /usr/bin/env python3 | ||
""" | ||
get_mr1_test_cases.py | ||
Creates fastq files with permutated reads from a folder containing fastq files. | ||
Author Loaiza, K. | ||
Comments Created: September, 2021. | ||
""" | ||
|
||
# Libraries | ||
import os | ||
import random | ||
import numpy as np | ||
from Bio import SeqIO | ||
|
||
# Folders | ||
in_dir = 'mr0_test_cases' | ||
out_dir = 'mr1_test_cases' | ||
os.mkdir(out_dir) | ||
|
||
# Create the new output folder | ||
if not os.path.exists(out_dir): | ||
os.mkdir(out_dir) | ||
# Remove the old output folder | ||
else: | ||
os.remove(out_dir) | ||
|
||
# Get fastq files | ||
files_list = os.listdir(in_dir) | ||
files_list = [file for file in files_list if file.endswith('.fastq')] | ||
|
||
for file in files_list: | ||
# Get filename without .fastq | ||
# In1538_MF974580.fastq > In1538_MF974580 | ||
name = os.path.splitext(file)[0] | ||
# Get filepath | ||
# mr0_test_cases/In1538_MF974580.fastq | ||
old_file = os.path.join(in_dir, file) | ||
for r in SeqIO.parse(old_file, "fastq"): | ||
# Open the fastq file | ||
for r in SeqIO.parse(old_file, 'fastq'): | ||
# Get the lenght of the fastq sequence | ||
P = len(r.seq) | ||
# Get I random indices | ||
I = 2 | ||
indices = np.sort(np.random.randint(P, size = I)) | ||
print(f'{r.id} size {P} split at {indices}') | ||
# Get the chunks of sequences | ||
s1 = r[:indices[0]] | ||
s2 = r[indices[0]+1:indices[1]] | ||
s3 = r[indices[1]+1:] | ||
# Randomly shuffle the chunks of sequences | ||
records = [s1, s2, s3] | ||
random.shuffle(records) | ||
# Get the new filepath | ||
new_file = os.path.join(out_dir, name) | ||
SeqIO.write(records, f"{new_file}.fastq", "fastq") | ||
# Save the chunks of sequences in one fastq file | ||
SeqIO.write(records, f'{new_file}.fastq', 'fastq') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,26 +1,53 @@ | ||
#! /usr/bin/env python3 | ||
""" | ||
get_mr2_test_cases.py | ||
Creates fastq files with duplicated reads from a folder containing fastq files. | ||
Author Loaiza, K. | ||
Comments Created: September 11, 2021. | ||
""" | ||
|
||
# Libraries | ||
import os | ||
import numpy as np | ||
from Bio import SeqIO | ||
|
||
# Folders | ||
in_dir = 'mr0_test_cases' | ||
out_dir = 'mr2_test_cases' | ||
os.mkdir(out_dir) | ||
|
||
# Create the new output folder | ||
if not os.path.exists(out_dir): | ||
os.mkdir(out_dir) | ||
# Remove the old output folder | ||
else: | ||
os.remove(out_dir) | ||
|
||
# Get fastq files | ||
files_list = os.listdir(in_dir) | ||
files_list = [file for file in files_list if file.endswith('.fastq')] | ||
|
||
for file in files_list: | ||
# Get filename without .fastq | ||
# In1538_MF974580.fastq > In1538_MF974580 | ||
name = os.path.splitext(file)[0] | ||
# Get filepath | ||
# mr0_test_cases/In1538_MF974580.fastq | ||
old_file = os.path.join(in_dir, file) | ||
for r in SeqIO.parse(old_file, "fastq"): | ||
# Open the fastq file | ||
for r in SeqIO.parse(old_file, 'fastq'): | ||
# Get the lenght of the fastq sequence | ||
P = len(r.seq) | ||
# Get I random indices | ||
I = 1 | ||
indices = np.sort(np.random.randint(P, size = I)) | ||
print(f'{r.id} size {P} split at {indices}') | ||
# Get the chunks of sequences | ||
s1 = r[:indices[0]] | ||
s2 = r[indices[0]+1:] | ||
s3 = r[:indices[0]] | ||
s4 = r[indices[0]+1:] | ||
records = [s1, s2, s3, s4] | ||
# Get the new filepath | ||
new_file = os.path.join(out_dir, name) | ||
SeqIO.write(records, f"{new_file}.fastq", "fastq") | ||
# Save the chunks of sequences in one fastq file | ||
SeqIO.write(records, f'{new_file}.fastq', 'fastq') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,66 +1,93 @@ | ||
#!/usr/bin/env python3 | ||
|
||
"""Group_intfinder.py | ||
This program runs intfinder for a folder containing many fastq files. | ||
Author K.Loaiza | ||
Comments Created: Thursday, April 18, 2020""" | ||
|
||
import argparse | ||
""" | ||
group_intfinder.py | ||
Runs intfinder for a folder containing fastq files. | ||
Author Loaiza, K. | ||
Comments Created: April, 2020. | ||
""" | ||
|
||
# Libraries | ||
import os | ||
import argparse | ||
import tempfile | ||
|
||
from subprocess import check_call # command tool | ||
|
||
# Menu | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument('--in_dir', help='folder with input sequences') | ||
parser.add_argument('--out_dir', help='folder to place the results') | ||
|
||
|
||
# Functions | ||
def get_files(in_dir): | ||
"""Function that takes an input folder and returns a list of all the fasta | ||
files inside""" | ||
""" | ||
Function that takes an input folder and returns a list of all the fastq | ||
files inside | ||
""" | ||
# Get fastq files | ||
files_list = os.listdir(in_dir) | ||
files_list = [os.path.join(in_dir, file) for file in files_list if file.endswith('.fastq')] | ||
files_list = [os.path.join(in_dir, file) | ||
for file | ||
in files_list | ||
if file.endswith('.fastq')] | ||
return files_list | ||
|
||
|
||
def get_intfinder_cmd(fasta, out_dir): | ||
"""Function that takes a fasta and returns an intfinder command""" | ||
identity_value = 0.5 | ||
coverage_value = 0.5 | ||
depth_value = 0.5 | ||
intfinder_results_dir = os.path.join(out_dir, f'intfinder_results_{coverage_value}_{identity_value}_{depth_value}') | ||
if not os.path.exists(intfinder_results_dir): | ||
os.mkdir(intfinder_results_dir) | ||
|
||
intfinder_results_path = os.path.join(intfinder_results_dir, os.path.splitext(os.path.basename(fasta))[0]) | ||
if not os.path.exists(intfinder_results_path): | ||
os.mkdir(intfinder_results_path) | ||
|
||
intfinder_command = ("python3 intfinder/intfinder.py" | ||
f" -i {fasta}" | ||
f" -o {intfinder_results_path}" | ||
def get_intfinder_cmd(fastq, out_dir): | ||
""" | ||
Function that takes a fastq and returns an intfinder command | ||
""" | ||
# Values range 0-1 | ||
# For example 0.5 means 50% identity | ||
identity = 0.5 | ||
coverage = 0.5 | ||
depth = 0.5 | ||
|
||
# New folder name with the values used | ||
# intfinder_results_0.5_0.5_0.5 | ||
dir = f'intfinder_results_{coverage}_{identity}_{depth}' | ||
results_dir = os.path.join(out_dir, dir) | ||
# Create the new sub folder | ||
if not os.path.exists(results_dir): | ||
os.mkdir(results_dir) | ||
|
||
# Get the filename to use it as a dirname | ||
# In431_AY029772.fastq > In431_AY029772 | ||
dir = os.path.splitext(os.path.basename(fastq))[0] | ||
results_sub_dir = os.path.join(results_dir, dir) | ||
# Create the new sub sub folder | ||
if not os.path.exists(results_sub_dir): | ||
os.mkdir(results_sub_dir) | ||
|
||
# Get intfinder_db location | ||
db_loc = os.path.join(os.getcwd(), 'intfinder_db') | ||
|
||
# Create the intfinder command | ||
intfinder_cmd = ("python3 intfinder/intfinder.py" | ||
f" -i {fastq}" | ||
f" -o {results_sub_dir}" | ||
#f" -tmp {tmp_dir}" | ||
f" -mp /usr/bin/kma" | ||
f" -p /home/david/MyUnixWorkplace/intfinder_db" | ||
f" -l {coverage_value}" | ||
f" -t {identity_value}" | ||
f" -d {depth_value}" | ||
f" -p {db_loc}" | ||
f" -l {coverage}" | ||
f" -t {identity}" | ||
f" -d {depth}" | ||
f" -x" | ||
f" -q ") | ||
return intfinder_cmd, results_sub_dir | ||
|
||
return intfinder_command, intfinder_results_path | ||
|
||
|
||
# Main program | ||
if __name__ == '__main__': | ||
# Menu | ||
args = parser.parse_args() | ||
|
||
# Create the new output folder | ||
if not os.path.exists(args.out_dir): | ||
os.makedirs(args.out_dir) | ||
|
||
# Get fastq files | ||
files_list = get_files(args.in_dir) | ||
for fasta in files_list: | ||
intfinder_cmd, intfinder_results_path = get_intfinder_cmd(fasta, args.out_dir) | ||
# For each file get an intfinder command | ||
for fastq in files_list: | ||
intfinder_cmd, results_sub_dir = get_intfinder_cmd(fastq, args.out_dir) | ||
#print(intfinder_cmd) | ||
# Run the intfinder command on the shell | ||
check_call(intfinder_cmd, shell=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters