From fd2d484853461f48ec428a7a535d3cfce5eb5ed9 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Tue, 16 Jan 2024 15:36:21 -0800 Subject: [PATCH] API: allow specifying an ASV to use as a control --- q2_katharoseq/_methods.py | 16 +++++++++++++--- q2_katharoseq/plugin_setup.py | 5 +++++ q2_katharoseq/tests/test_method.py | 15 +++++++++++++++ 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/q2_katharoseq/_methods.py b/q2_katharoseq/_methods.py index c641c11..d0070d0 100644 --- a/q2_katharoseq/_methods.py +++ b/q2_katharoseq/_methods.py @@ -45,7 +45,8 @@ 'o__Rhodobacterales;f__Rhodobacteraceae;g__Paracoccus'], 'single': [ 'd__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;' - 'o__Burkholderiales;f__Comamonadaceae;g__Variovorax'] + 'o__Burkholderiales;f__Comamonadaceae;g__Variovorax'], + 'asv': '' } @@ -103,7 +104,13 @@ def read_count_threshold( positive_control_column: qiime2.CategoricalMetadataColumn, cell_count_column: qiime2.NumericMetadataColumn, table: pd.DataFrame, - control: str) -> None: + control: str, + asv: str=None) -> None: + if control == 'asv': + if asv is None: + raise ValueError("Control type set to asv but no asv provided") + if asv not in table.columns: + raise ValueError("asv not found in the feature table") # CONVERSIONS positive_control_column = positive_control_column.to_series() @@ -146,7 +153,10 @@ def read_count_threshold( df['asv_reads'] = df.sum(axis=1) # NUMBER READS ALIGNING TO MOCK COMMUNITY INPUT - df['control_reads'] = df[control_type[control]].sum(axis=1) + if control == 'asv': + df['control_reads'] = df[asv] + else: + df['control_reads'] = df[control_type[control]].sum(axis=1) # PERCENT CORRECTLY ASSIGNED df['correct_assign'] = df['control_reads'] / df['asv_reads'] diff --git a/q2_katharoseq/plugin_setup.py b/q2_katharoseq/plugin_setup.py index 7b6106f..a7a3492 100644 --- a/q2_katharoseq/plugin_setup.py +++ b/q2_katharoseq/plugin_setup.py @@ -40,6 +40,7 @@ 'positive_control_value': Str, 'positive_control_column': MetadataColumn[Categorical], 'cell_count_column': MetadataColumn[Numeric], + 'asv': Str, }, input_descriptions={ 'table': ( @@ -63,6 +64,10 @@ 'The column in the sample metadata that describes which samples ' 'are and are not controls.' ), + 'asv': ( + 'Specify an exact ASV to use for a control. If the features are ' + 'hashed, please use the feature hash' + ), }, name='Methods for the application of the KatharoSeq protocol', description='KatharoSeq is high-throughput protocol combining laboratory ' diff --git a/q2_katharoseq/tests/test_method.py b/q2_katharoseq/tests/test_method.py index 17b6891..bc313dd 100644 --- a/q2_katharoseq/tests/test_method.py +++ b/q2_katharoseq/tests/test_method.py @@ -58,6 +58,21 @@ def setUp(self): folder = '../../example' self.fp = join(dirname(abspath(getfile(currentframe()))), folder) + def test_specify_asv_as_control(self): + with tempfile.TemporaryDirectory() as output_dir: + read_count_threshold( + output_dir, + self.threshold, + self.positive_control_value, + self.positive_control_column, + self.cell_count_column, + self.table, + 'asv', + 'f4') + + index_fp = os.path.join(output_dir, 'index.html') + self.assertTrue(os.path.exists(index_fp)) + def test_outputs_index(self): with tempfile.TemporaryDirectory() as output_dir: read_count_threshold(