Skip to content

Commit

Permalink
Switched to python3
Browse files Browse the repository at this point in the history
Changes to parallel execution (possibly unstable)
Removed StringIO
Removed Bio.Alphabet
Removed basestring
Removed long
Removed izip
Updated requirements.txt
Added ete3 database patch
Added newer refseq assembly summary
  • Loading branch information
AlphaSquad committed Oct 5, 2020
1 parent 04c47a7 commit 32d15e0
Show file tree
Hide file tree
Showing 35 changed files with 243,473 additions and 842 deletions.
17 changes: 8 additions & 9 deletions anonymizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import sys
import io
import math
import StringIO
import argparse
from Bio import SeqIO
from scripts.Validator.sequencevalidator import SequenceValidator
Expand Down Expand Up @@ -52,18 +51,18 @@ def anonymize_sequences(
@param output_stream: Output stream of anonymous fasta format data
@type output_stream: file | io.FileIO | StringIO.StringIO
@param sequence_prefix: Prefix of the anonymous sequence id.
@type sequence_prefix: basestring
@type sequence_prefix: str
@param file_format: Fasta format of input and output. Either 'fasta' or 'fastq'.
@type file_format: basestring
@type file_format: str
@return: None
@rtype: None
"""
assert self.is_stream(input_stream)
assert self.is_stream(output_stream)
assert self.is_stream(mapping)
assert isinstance(sequence_prefix, basestring)
assert isinstance(file_format, basestring)
assert isinstance(sequence_prefix, str)
assert isinstance(file_format, str)
file_format = file_format.lower()
assert file_format in self._legal_formats

Expand Down Expand Up @@ -92,18 +91,18 @@ def anonymize_sequence_pairs(
@param output_stream: Output stream of anonymous fasta format data
@type output_stream: file | io.FileIO | StringIO.StringIO | None
@param sequence_prefix: Prefix of the anonymous sequence id.
@type sequence_prefix: basestring
@type sequence_prefix: str
@param file_format: Fasta format of input and output. Either 'fasta' or 'fastq'.
@type file_format: basestring
@type file_format: str
@return: None
@rtype: None
"""
assert self.is_stream(input_stream)
assert self.is_stream(output_stream)
assert self.is_stream(mapping)
assert isinstance(sequence_prefix, basestring)
assert isinstance(file_format, basestring)
assert isinstance(sequence_prefix, str)
assert isinstance(file_format, str)
file_format = file_format.lower()
assert file_format in self._legal_formats

Expand Down
37 changes: 18 additions & 19 deletions fastaanonymizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import os
import io
import random
import StringIO
import tempfile
import subprocess
from scripts.Validator.sequencevalidator import SequenceValidator
Expand Down Expand Up @@ -61,8 +60,8 @@ def __init__(self, logfile=None, verbose=True, debug=False, seed=None, tmp_dir=N
"""
assert isinstance(verbose, bool)
assert isinstance(debug, bool)
assert seed is None or isinstance(seed, (long, int, float, basestring))
assert tmp_dir is None or isinstance(tmp_dir, basestring)
assert seed is None or isinstance(seed, (long, int, float, str))
assert tmp_dir is None or isinstance(tmp_dir, str)
if tmp_dir is not None:
assert self.validate_dir(tmp_dir)
else:
Expand Down Expand Up @@ -114,17 +113,17 @@ def get_command(
@return: System command line
@rtype: str
"""
assert isinstance(path_input, basestring)
assert isinstance(path_input, str)
assert self.validate_dir(path_input, silent=True) or self.validate_file(path_input, silent=True)
assert isinstance(file_path_output, basestring)
assert isinstance(file_path_output, str)
assert self.validate_dir(file_path_output, only_parent=True)
assert isinstance(file_path_mapping, basestring)
assert isinstance(file_path_mapping, str)
assert self.validate_dir(file_path_mapping, only_parent=True)
assert isinstance(sequence_prefix, basestring)
assert isinstance(file_format, basestring)
assert isinstance(sequence_prefix, str)
assert isinstance(file_format, str)
file_format = file_format.lower()
assert file_format in self._legal_formats
assert file_extension is None or isinstance(file_extension, basestring)
assert file_extension is None or isinstance(file_extension, str)
assert isinstance(paired, bool)

# https://www.gnu.org/software/coreutils/manual/html_node/Random-sources.html#Random-sources
Expand Down Expand Up @@ -210,14 +209,14 @@ def shuffle_anonymize(
file_path_output = tempfile.mktemp(dir=self._tmp_dir)
if file_path_mapping is None:
file_path_mapping = tempfile.mktemp(dir=self._tmp_dir)
assert isinstance(path_input, basestring)
assert isinstance(path_input, str)
assert self.validate_dir(path_input, silent=True) or self.validate_file(path_input, silent=True)
assert isinstance(file_path_output, basestring)
assert isinstance(file_path_output, str)
assert self.validate_dir(file_path_output, only_parent=True)
assert isinstance(file_path_mapping, basestring)
assert isinstance(file_path_mapping, str)
assert self.validate_dir(file_path_mapping, only_parent=True)
assert isinstance(prefix, basestring)
assert isinstance(file_format, basestring)
assert isinstance(prefix, str)
assert isinstance(file_format, str)
file_format = file_format.lower()
assert file_format in self._legal_formats

Expand Down Expand Up @@ -266,14 +265,14 @@ def interweave_shuffle_anonymize(
file_path_output = tempfile.mktemp(dir=self._tmp_dir)
if file_path_mapping is None:
file_path_mapping = tempfile.mktemp(dir=self._tmp_dir)
assert isinstance(path_input, basestring)
assert isinstance(path_input, str)
assert self.validate_dir(path_input, silent=True) or self.validate_file(path_input, silent=True)
assert isinstance(file_path_output, basestring)
assert isinstance(file_path_output, str)
assert self.validate_dir(file_path_output, only_parent=True)
assert isinstance(file_path_mapping, basestring)
assert isinstance(file_path_mapping, str)
assert self.validate_dir(file_path_mapping, only_parent=True)
assert isinstance(prefix, basestring)
assert isinstance(file_format, basestring)
assert isinstance(prefix, str)
assert isinstance(file_format, str)
file_format = file_format.lower()
assert file_format in self._legal_formats

Expand Down
47 changes: 23 additions & 24 deletions fastastreamer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import os
import io
import errno
import StringIO
import itertools
import argparse
from Bio import SeqIO
Expand Down Expand Up @@ -47,27 +46,27 @@ def stream_directory(self, directory, out_stream=sys.stdout, file_format="fastq"
@attention:
@param directory: A directory
@type directory: basestring
@type directory: str
@param out_stream: A stream the output will be written to.
@type out_stream: file | io.FileIO | StringIO.StringIO
@param file_format: Fasta format of input and output. Either 'fasta' or 'fastq'.
@type file_format: basestring
@type file_format: str
@param extension: file extension to be filtered for
@type extension: basestring
@type extension: str
@param paired: sequences are streamed interweaved from a pair of files if True, else consecutively
@type paired: bool
@return: None
@rtype: None
"""
assert isinstance(directory, basestring)
assert isinstance(directory, str)
directory = FastaStreamer.get_full_path(directory)
assert self.validate_dir(directory)
assert self.is_stream(out_stream)
assert isinstance(file_format, basestring)
assert isinstance(file_format, str)
file_format = file_format.lower()
assert file_format in self._legal_formats
assert extension is None or isinstance(extension, basestring)
assert extension is None or isinstance(extension, str)

list_of_file = self.get_files_in_directory(directory, extension=extension)
if not paired:
Expand All @@ -82,22 +81,22 @@ def stream_file(self, file_path, out_stream=sys.stdout, file_format="fastq", pai
@attention:
@param file_path: A file path
@type file_path: basestring
@type file_path: str
@param out_stream: A stream the output will be written to.
@type out_stream: file | io.FileIO | StringIO.StringIO
@param file_format: Fasta format of input and output. Either 'fasta' or 'fastq'.
@type file_format: basestring
@type file_format: str
@param paired: sequences are streamed as pair, else one by one
@type paired: bool
@return: None
@rtype: None
"""
assert isinstance(file_path, basestring)
assert isinstance(file_path, str)
file_path = FastaStreamer.get_full_path(file_path)
assert self.validate_file(file_path)
assert self.is_stream(out_stream)
assert isinstance(file_format, basestring)
assert isinstance(file_format, str)
file_format = file_format.lower()
assert file_format in self._legal_formats

Expand All @@ -110,23 +109,23 @@ def consecutive_stream(self, src, out_stream=sys.stdout, file_format="fasta", pa
@attention:
@param src: A file path or list of file paths
@type src: basestring | list[basestring]
@type src: str | list[str]
@param out_stream: A stream the output will be written to.
@type out_stream: file | io.FileIO | StringIO.StringIO
@param file_format: Fasta format of input and output. Either 'fasta' or 'fastq'.
@type file_format: basestring
@type file_format: str
@return: None
@rtype: None
"""
assert isinstance(src, (basestring, list))
assert isinstance(src, (str, list))
assert self.is_stream(out_stream)
assert isinstance(file_format, basestring)
assert isinstance(file_format, str)
file_format = file_format.lower()
assert file_format in self._legal_formats

list_of_file_paths = None
if isinstance(src, basestring):
if isinstance(src, str):
assert self.validate_file(src)
list_of_file_paths = [src]
elif isinstance(src, list):
Expand Down Expand Up @@ -169,26 +168,26 @@ def interweave_stream(self, src, out_stream=sys.stdout, file_format="fasta", ext
@attention:
@param src: A file path or list of file paths
@type src: basestring | list[basestring]
@type src: str | list[str]
@param out_stream: A stream the output will be written to.
@type out_stream: file | io.FileIO | StringIO.StringIO
@param file_format: Fasta format of input and output. Either 'fasta' or 'fastq'.
@type file_format: basestring
@type file_format: str
@param extension: file extension to be filtered for
@type extension: basestring
@type extension: str
@return: None
@rtype: None
"""
assert isinstance(src, (basestring, list))
assert isinstance(src, (str, list))
assert self.is_stream(out_stream)
assert isinstance(file_format, basestring)
assert isinstance(file_format, str)
file_format = file_format.lower()
assert file_format in self._legal_formats
assert isinstance(extension, basestring)
assert isinstance(extension, str)

list_of_file_paths = None
if isinstance(src, basestring):
if isinstance(src, str):
assert self.validate_file(src)
list_of_file_paths = [src]
elif isinstance(src, list):
Expand All @@ -204,7 +203,7 @@ def interweave_stream(self, src, out_stream=sys.stdout, file_format="fasta", ext
file_path_one = file_path
file_path_second = file_path[:file_path.rfind(file_path_one_suffix)] + file_path_second_suffix

for seq_record_f, seq_record_b in itertools.izip_longest(SeqIO.parse(file_path_one, file_format), SeqIO.parse(file_path_second, file_format)):
for seq_record_f, seq_record_b in itertools.zip_longest(SeqIO.parse(file_path_one, file_format), SeqIO.parse(file_path_second, file_format)):
if seq_record_f is None or seq_record_b is None:
msg = "forward and backward file have an unequal amount of sequences:\n"
msg += "forward: '{}'\nbackward: '{}'\n".format(file_path_one, file_path_second)
Expand Down
2 changes: 1 addition & 1 deletion genomeannotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ def create_meta_table(self, file_path_metadata_table):

if __name__ == "__main__":
pipeline = GenomeAnnotation(
args=None, version=__version__, separator="\t",
args=None, separator="\t",
column_name_genome_id="genome_ID", column_name_otu="OTU", column_name_novelty_category="novelty_category",
column_name_ncbi="NCBI_ID")
pipeline.my_main()
7 changes: 2 additions & 5 deletions metagenome_from_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@
from scripts.Validator.validator import Validator
from scripts.configfilehandler import ConfigFileHandler
from scripts.loggingwrapper import LoggingWrapper as logger
try:
from configparser import ConfigParser
except ImportError:
from ConfigParser import ConfigParser # ver. < 3.0
from configparser import ConfigParser
import scripts.get_genomes as GG
import shutil
import os
Expand Down Expand Up @@ -81,7 +78,7 @@ def create_config(args,cfg):
if args.seed is not None:
config.set('Main', "seed", args.seed)
name = os.path.join(args.o,"config.ini")
with open(name,'wb') as cfg_path:
with open(name,'w+') as cfg_path:
config.write(cfg_path)
return name

Expand Down
2 changes: 1 addition & 1 deletion metagenomesimulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -838,7 +838,7 @@ def _compress_data(self):
pipeline = None
try:
pipeline = MetagenomeSimulation(
args=None, version=__version__, separator="\t",
args=None, separator="\t",
column_name_genome_id="genome_ID", column_name_otu="OTU", column_name_novelty_category="novelty_category",
column_name_ncbi="NCBI_ID", column_name_source="source")
except (KeyboardInterrupt, SystemExit, Exception, ValueError, RuntimeError) as e:
Expand Down
10 changes: 5 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
numpy==1.13.0
biopython==1.69.0
matplotlib==2.0.2
biom-format==2.1.6
ete2==2.3.10
numpy
biopython
matplotlib
biom-format
ete3
8 changes: 4 additions & 4 deletions scripts/Archive/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import io
import zipfile
import tarfile
from compress import Compress
from .compress import Compress


class Archive(Compress):
Expand Down Expand Up @@ -47,15 +47,15 @@ def __init__(self, default_compression="gz", logfile=None, verbose=True):
@param default_compression: default compression used for files
@type default_compression: str | unicode
@param logfile: file handler or file path to a log file
@type logfile: file | io.FileIO | StringIO.StringIO | basestring
@type logfile: file | io.FileIO | StringIO.StringIO | str
@param verbose: Not verbose means that only warnings and errors will be past to stream
@type verbose: bool
@return: None
@rtype: None
"""
assert logfile is None or isinstance(logfile, basestring) or self.is_stream(logfile)
assert isinstance(default_compression, basestring), "separator must be string"
assert logfile is None or isinstance(logfile, str) or self.is_stream(logfile)
assert isinstance(default_compression, str), "separator must be string"
assert isinstance(verbose, bool), "verbose must be true or false"
assert default_compression.lower() in self._open, "Unknown compression: '{}'".format(default_compression)

Expand Down
9 changes: 4 additions & 5 deletions scripts/Archive/compress.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import os
import io
import StringIO
import time
import datetime
from scripts.Validator.validator import Validator
Expand Down Expand Up @@ -44,7 +43,7 @@ def __init__(self, default_compression="gz", label="Compress", logfile=None, ver
@param default_compression: default compression used for files
@type default_compression: str | unicode
@param logfile: file handler or file path to a log file
@type logfile: file | io.FileIO | StringIO.StringIO | basestring
@type logfile: file | io.FileIO | StringIO.StringIO | str
@param verbose: Not verbose means that only warnings and errors will be past to stream
@type verbose: bool
@param debug: Display debug messages
Expand All @@ -53,8 +52,8 @@ def __init__(self, default_compression="gz", label="Compress", logfile=None, ver
@return: None
@rtype: None
"""
assert logfile is None or isinstance(logfile, basestring) or self.is_stream(logfile)
assert isinstance(default_compression, basestring), "separator must be string"
assert logfile is None or isinstance(logfile, str) or self.is_stream(logfile)
assert isinstance(default_compression, str), "separator must be string"
assert isinstance(verbose, bool), "verbose must be true or false"
assert default_compression.lower() in self._open, "Unknown compression: '{}'".format(default_compression)

Expand All @@ -76,7 +75,7 @@ def get_compression_type(self, file_path):
@return: compression type, None if no compression
@rtype: str | None
"""
assert isinstance(file_path, basestring)
assert isinstance(file_path, str)
filename, extension = os.path.splitext(file_path)

if extension == ".zip" and not zipfile.is_zipfile(file_path):
Expand Down
Loading

0 comments on commit 32d15e0

Please sign in to comment.