From f758c6989526e9a2b0a365f3ca8ae03775b7b97e Mon Sep 17 00:00:00 2001 From: Ben Woodcroft Date: Thu, 31 Oct 2024 06:25:59 +1000 Subject: [PATCH 01/11] Add pyproject.toml. Addresses the warning described at https://github.com/pypa/pip/issues/11457 --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..97825da7 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,4 @@ +[build-system] +# XXX: If your project needs other packages to build properly, add them to this list. +requires = ["setuptools >= 64"] +build-backend = "setuptools.build_meta" From 7da6ec1dfc33c77dfa2e7c7bbe0607983fd8e5a9 Mon Sep 17 00:00:00 2001 From: Ben Woodcroft Date: Fri, 1 Nov 2024 15:19:56 +1000 Subject: [PATCH 02/11] config.yaml: Remove this extraneous file. --- .gitignore | 1 + config.yaml | 53 ----------------------------------------------------- 2 files changed, 1 insertion(+), 53 deletions(-) delete mode 100644 config.yaml diff --git a/.gitignore b/.gitignore index da6698c7..d63c82a3 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ aviary_genome.egg-info example/ test/data/.conda +config.yaml diff --git a/config.yaml b/config.yaml deleted file mode 100644 index f8b5a9db..00000000 --- a/config.yaml +++ /dev/null @@ -1,53 +0,0 @@ -%YAML 1.1 ---- -fasta: none -long_reads: none -long_read_type: ont -short_reads_1: none -short_reads_2: none -min_contig_size: 1500 -min_bin_size: 200000 -max_threads: 8 -pplacer_threads: 8 -max_memory: 250 -genome_size: 5000000 -reference_filter: none -strain_analysis: false -unassembled_long: none -mags: none -mag_directory: none -mag_extension: none -instrain_params: {} -profile_read_list: none -min_long_read_length: 500 -keep_percent: 99 -min_mean_q: 40 -guppy_model: r941_min_high_g360 -gtdbtk_folder: /bio_data/gtdbtk/release202/ -busco_folder: none -checkm2_db_folder: ~/databases/CheckM2_database/ -eggnog_folder: /bio_data/emapper/ -virsorter_data: none -email: somename@email.co -gsa: -- /home/rhys_newell_microba_com/git/aviary/none -gsa_mappings: none -semibin_model: global -kmer_sizes: -- auto -use_megahit: false -coassemble: true -min_cov_long: 20 -min_cov_short: 3 -exclude_contig_cov: 100 -exclude_contig_size: 25000 -long_contig_size: 100000 -use_checkm2_scores: false -previous_runs: none -min_completeness: none -max_contamination: none -ani: none -precluster_ani: none -precluster_method: none -pggb_params: none -tmpdir: none From 3c1c34d0a9734a6a34e327757e439bdf5c561072 Mon Sep 17 00:00:00 2001 From: Ben Woodcroft Date: Sat, 2 Nov 2024 21:51:59 +1000 Subject: [PATCH 03/11] assemble/recover: Require users to specify coassembly or not. Fixes #219. --- .gitignore | 1 + aviary/aviary.py | 1 - aviary/modules/processor.py | 12 ++++++++++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index da6698c7..5e1d3d8a 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ aviary_genome.egg-info example/ test/data/.conda +test/data/wgsim.metaspades.assembly.fna.fai diff --git a/aviary/aviary.py b/aviary/aviary.py index 54d6d966..82274dfe 100755 --- a/aviary/aviary.py +++ b/aviary/aviary.py @@ -796,7 +796,6 @@ def main(): nargs='?', const=True, dest='coassemble', - default=False, ) assemble_group.add_argument( diff --git a/aviary/modules/processor.py b/aviary/modules/processor.py index a29bcf9a..324ed380 100644 --- a/aviary/modules/processor.py +++ b/aviary/modules/processor.py @@ -233,7 +233,6 @@ def __init__(self, self.pe2 = 'none' self.short_percent_identity = 'none' - try: self.kmer_sizes = args.kmer_sizes self.use_megahit = args.use_megahit @@ -246,7 +245,7 @@ def __init__(self, except AttributeError: self.kmer_sizes = ['auto'] self.use_megahit = False - self.coassemble = True + self.coassemble = False self.min_cov_long = 20 self.min_cov_short = 3 self.exclude_contig_cov = 100 @@ -346,6 +345,15 @@ def make_config(self): with open(template_conf_file) as template_config: conf = yaml.load(template_config) + + if self.assembly == 'none' or self.assembly is None: + # Check if coassembly or not needs to be specified by the user. + if self.coassemble is None: + if len(self.pe1) > 1 or len(self.longreads) > 1: + logging.error("Multiple readsets detected. Either specify '--coassemble' for coassembly of or '--coassemble no'.") + sys.exit(-1) + if self.coassemble is None: + self.coassemble = False # ensure that something is specified so that the config file is well formed if self.assembly != "none" and self.assembly is not None: self.assembly = list(dict.fromkeys([os.path.abspath(p) for p in self.assembly])) From 2288aeb9c5901a0b4c7812ee405ce9552ce4995c Mon Sep 17 00:00:00 2001 From: Ben Woodcroft Date: Sat, 2 Nov 2024 21:57:09 +1000 Subject: [PATCH 04/11] --coassemble: Improve help message. --- aviary/aviary.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aviary/aviary.py b/aviary/aviary.py index 82274dfe..76161943 100755 --- a/aviary/aviary.py +++ b/aviary/aviary.py @@ -790,12 +790,12 @@ def main(): assemble_group.add_argument( '--coassemble', '--co-assemble', '--co_assemble', help='Specifies whether or not, when given multiple input reads, to coassemble them. \n' - 'If False, Aviary will use the first set of short reads and first set of long reads to perform assembly \n' + 'If False (no), Aviary will use the first set of short reads and first set of long reads to perform assembly \n' 'All read files will still be used during the MAG recovery process for differential coverage.', type=str2bool, nargs='?', const=True, - dest='coassemble', + metavar='yes|no', ) assemble_group.add_argument( From e2ccfeb8a048958dc01d84f4e9e544460de6406d Mon Sep 17 00:00:00 2001 From: Ben Woodcroft Date: Sat, 2 Nov 2024 22:05:52 +1000 Subject: [PATCH 05/11] --coassemble: Fix regression. --- aviary/modules/processor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aviary/modules/processor.py b/aviary/modules/processor.py index 324ed380..6de61027 100644 --- a/aviary/modules/processor.py +++ b/aviary/modules/processor.py @@ -349,7 +349,8 @@ def make_config(self): if self.assembly == 'none' or self.assembly is None: # Check if coassembly or not needs to be specified by the user. if self.coassemble is None: - if len(self.pe1) > 1 or len(self.longreads) > 1: + if (self.pe1 != 'none' and len(self.pe1) > 1) or \ + (self.longreads != 'none' and len(self.longreads) > 1): logging.error("Multiple readsets detected. Either specify '--coassemble' for coassembly of or '--coassemble no'.") sys.exit(-1) if self.coassemble is None: From 1aceac00e8dccd95cbf41122c83eac2b8eff6197 Mon Sep 17 00:00:00 2001 From: AroneyS Date: Fri, 8 Nov 2024 08:36:21 +1000 Subject: [PATCH 06/11] increase base vamb runtime to 48h --- aviary/modules/binning/binning.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aviary/modules/binning/binning.smk b/aviary/modules/binning/binning.smk index ed8593b2..182c3a3c 100644 --- a/aviary/modules/binning/binning.smk +++ b/aviary/modules/binning/binning.smk @@ -178,7 +178,7 @@ rule vamb: config["max_threads"] resources: mem_mb = lambda wildcards, attempt: min(int(config["max_memory"])*1024, 128*1024*attempt), - runtime = lambda wildcards, attempt: 24*60*attempt, + runtime = lambda wildcards, attempt: 48*60*attempt, gpus = 1 if config["request_gpu"] else 0 output: "data/vamb_bins/done" From adf75ba9a20ac49be6c3e182a4d761f2a9f453ec Mon Sep 17 00:00:00 2001 From: Ben J Woodcroft Date: Fri, 8 Nov 2024 11:57:20 +1000 Subject: [PATCH 07/11] actions: test-aviary: Attempt fix. (#221) (#225) * actions: test-aviary: Attempt fix. * actions: test-aviary: Attempt fix 2. --- .github/workflows/test-aviary.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-aviary.yml b/.github/workflows/test-aviary.yml index 54930740..22031c9a 100644 --- a/.github/workflows/test-aviary.yml +++ b/.github/workflows/test-aviary.yml @@ -12,14 +12,13 @@ jobs: matrix: python-version: ["3.10"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up environment with Python ${{ matrix.python-version }} - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: activate-environment: test environment-file: aviary.yml python-version: ${{ matrix.python-version }} - miniforge-variant: Mambaforge auto-activate-base: false channels: conda-forge,bioconda channel-priority: true From 4787d7dcf670d85fca3e3c73a44b1008b83c3753 Mon Sep 17 00:00:00 2001 From: AroneyS Date: Thu, 14 Nov 2024 13:58:40 +1000 Subject: [PATCH 08/11] fix vamb bins being included in dastool --- aviary/modules/binning/scripts/das_tool.py | 3 +- test/test_integration.py | 36 ++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/aviary/modules/binning/scripts/das_tool.py b/aviary/modules/binning/scripts/das_tool.py index 1b4186a3..e8fabfe5 100644 --- a/aviary/modules/binning/scripts/das_tool.py +++ b/aviary/modules/binning/scripts/das_tool.py @@ -19,7 +19,8 @@ binners = [] for (binner, extension) in unrefined_binners_to_use: if binner not in snakemake.config['skip_binners']: - binners.append((f'{binner}_bins/', extension, f'data/{binner}_bins.tsv')) + extra = 'bins/' if binner == 'vamb' else '' + binners.append((f'{binner}_bins/'+extra, extension, f'data/{binner}_bins.tsv')) for (binner, extension) in refined_binners_to_use: if binner not in snakemake.config['skip_binners']: diff --git a/test/test_integration.py b/test/test_integration.py index fe17f77f..c3cb232b 100755 --- a/test/test_integration.py +++ b/test/test_integration.py @@ -156,6 +156,42 @@ def test_short_read_recovery_fast(self): self.assertFalse(os.path.isfile(f"{output_dir}/aviary_out/data/final_contigs.fasta")) + def test_short_read_recovery_vamb(self): + output_dir = os.path.join("example", "test_short_read_recovery_vamb") + self.setup_output_dir(output_dir) + + # Create inflated assembly file + cmd = f"cat {data}/assembly.fasta > {output_dir}/assembly.fasta" + multiplier = 100 + for i in range(multiplier): + cmd += f" && awk '/^>/ {{print $0 \"{i}\"}} !/^>/ {{print $0}}' {data}/assembly.fasta >> {output_dir}/assembly.fasta" + + subprocess.run(cmd, shell=True, check=True) + + cmd = ( + f"aviary recover " + f"--assembly {output_dir}/assembly.fasta " + f"-o {output_dir}/aviary_out " + f"-1 {data}/wgsim.1.fq.gz " + f"-2 {data}/wgsim.2.fq.gz " + f"--binning-only " + f"--skip-binners rosella semibin metabat " + f"--skip-qc " + f"--refinery-max-iterations 0 " + f"--conda-prefix {path_to_conda} " + f"-n 32 -t 32 " + ) + subprocess.run(cmd, shell=True, check=True) + + bin_info_path = f"{output_dir}/aviary_out/bins/bin_info.tsv" + self.assertTrue(os.path.isfile(bin_info_path)) + with open(bin_info_path) as f: + num_lines = sum(1 for _ in f) + self.assertTrue(num_lines > 2) + + self.assertFalse(os.path.isfile(f"{output_dir}/aviary_out/data/final_contigs.fasta")) + + @unittest.skip("Skipping test due to queue submission") def test_short_read_recovery_queue_submission(self): output_dir = os.path.join("example", "test_short_read_recovery_queue_submission") self.setup_output_dir(output_dir) From a2269e03a1cd5aaf625e9dc334b97dd7b0c2e48e Mon Sep 17 00:00:00 2001 From: Ben J Woodcroft Date: Fri, 15 Nov 2024 17:21:09 +1000 Subject: [PATCH 09/11] spades: Update to 4. (#216) * spades: Update to 4. Fixes #214. Reported by: @michoug. Suggested by: @AroneyS. * final_assembly: Upgrade unicycler to 0.5.1. * spades: Pin deps. Suggested by @rhysnewell. --- aviary/modules/assembly/envs/final_assembly.yaml | 4 ++-- aviary/modules/assembly/envs/spades.yaml | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/aviary/modules/assembly/envs/final_assembly.yaml b/aviary/modules/assembly/envs/final_assembly.yaml index 7b819ffc..e23f4be5 100644 --- a/aviary/modules/assembly/envs/final_assembly.yaml +++ b/aviary/modules/assembly/envs/final_assembly.yaml @@ -2,5 +2,5 @@ channels: - conda-forge - bioconda dependencies: - - unicycler = 0.4.8 - - samtools = 1.11 \ No newline at end of file + - unicycler = 0.5.1 + - samtools = 1.21 \ No newline at end of file diff --git a/aviary/modules/assembly/envs/spades.yaml b/aviary/modules/assembly/envs/spades.yaml index 93b2b5c8..9ba3c5c3 100644 --- a/aviary/modules/assembly/envs/spades.yaml +++ b/aviary/modules/assembly/envs/spades.yaml @@ -2,8 +2,8 @@ channels: - conda-forge - bioconda dependencies: - - python <= 3.9 - - spades = 3.15.4 + - python = 3.12.0 # Keep things consistent to aid debugging. + - spades = 4.0.0 - megahit = 1.2.9 - - pyyaml - - joblib + - pyyaml = 6.0.2 + - joblib = 1.4.2 From af6d3066dc9fbf09f4722d8d6e833f163fde9d11 Mon Sep 17 00:00:00 2001 From: Ben J Woodcroft Date: Wed, 20 Nov 2024 12:09:16 +1000 Subject: [PATCH 10/11] aviary: Update --build help message. (#222) --- aviary/aviary.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/aviary/aviary.py b/aviary/aviary.py index 54d6d966..8ab7c8ce 100755 --- a/aviary/aviary.py +++ b/aviary/aviary.py @@ -260,11 +260,13 @@ def main(): base_group.add_argument( '--build', - help='Build conda environments and then exits. Equivalent to \"--snakemake-cmds \'--conda-create-envs-only True \' \"', + help='Build conda environments necessary to run the pipeline, and then exit. Equivalent to "--snakemake-cmds \'--conda-create-envs-only True \' ". Other inputs should be specified as if running normally so that the right set of conda environments is built.', type=str2bool, nargs='?', const=True, dest='build', + metavar='yes|no', + default='no', ) base_group.add_argument( From 28c63a3f73c02527ef92756fa8a611c2602cf5e4 Mon Sep 17 00:00:00 2001 From: rhysnewell Date: Thu, 21 Nov 2024 00:06:09 +0000 Subject: [PATCH 11/11] chore: bump version --- aviary/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aviary/__init__.py b/aviary/__init__.py index 02d7f09f..80cbaf31 100644 --- a/aviary/__init__.py +++ b/aviary/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.9.2" +__version__ = "0.10.0" # CONSTANTS