added test for visualizer output

biocore · Feb 23, 2022 · b51f4a0 · b51f4a0
2 parents 774e5c9 + 78d7d6c
commit b51f4a0
Show file tree

Hide file tree

Showing 7 changed files with 112 additions and 9 deletions.
diff --git a/q2_katharoseq/_format.py b/q2_katharoseq/_format.py
@@ -0,0 +1,22 @@
+import qiime2.plugin.model as model
+
+
+STATS_HEADER = ['sample-id',
+                'log_total_reads',
+                'estimated_biomass_per_pcrrxn',
+                'estimated_biomass_per_dnarxn',
+                'extraction_mass_g',
+                'estimated_cells_per_g',
+                'log_estimated_cells_per_g']
+
+
+class EstimatedBiomassFmt(model.TextFileFormat):
+    def sniff(self):
+        line = open(str(self)).readline()
+        hdr = line.strip().split(',')
+
+        return hdr == STATS_HEADER
+
+
+EstimatedBiomassDirFmt = model.SingleFileDirectoryFormat(
+    'EstimatedBiomassDirFmt', 'est_biomass.csv', EstimatedBiomassFmt)
diff --git a/q2_katharoseq/_methods.py b/q2_katharoseq/_methods.py
@@ -156,7 +156,7 @@ def estimating_biomass(
         positive_control_column: qiime2.CategoricalMetadataColumn,
         pcr_template_vol: int,
         dna_extract_vol: int,
-        extraction_mass_g: qiime2.CategoricalMetadataColumn) -> pd.DataFrame:
+        extraction_mass_g: qiime2.NumericMetadataColumn) -> pd.DataFrame:
 
     total_reads = total_reads.to_series()
     filtered = pd.DataFrame(total_reads[total_reads > min_total_reads])

diff --git a/q2_katharoseq/_transformer.py b/q2_katharoseq/_transformer.py
@@ -0,0 +1,16 @@
+import pandas as pd
+
+from .plugin_setup import plugin
+from ._format import EstimatedBiomassFmt
+
+
+@plugin.register_transformer
+def _1(data: pd.DataFrame) -> EstimatedBiomassFmt:
+    ff = EstimatedBiomassFmt()
+    data.to_csv(str(ff))
+    return ff
+
+
+@plugin.register_transformer
+def _2(ff: EstimatedBiomassFmt) -> pd.DataFrame:
+    return pd.read_csv(str(ff), index_col='sample-id')
diff --git a/q2_katharoseq/_type.py b/q2_katharoseq/_type.py
@@ -0,0 +1,3 @@
+from qiime2.plugin import SemanticType
+
+EstimatedBiomass = SemanticType('EstimatedBiomass')
diff --git a/q2_katharoseq/plugin_setup.py b/q2_katharoseq/plugin_setup.py
@@ -1,8 +1,11 @@
+import importlib
 from qiime2.plugin import (Plugin, Citations, Str, Int,
                            MetadataColumn, Categorical, Numeric, Choices)
 from q2_types.feature_table import (FeatureTable, Frequency)
-from . import read_count_threshold
+from . import read_count_threshold, estimating_biomass
 import q2_katharoseq
+from q2_katharoseq._type import EstimatedBiomass
+from q2_katharoseq._format import EstimatedBiomassFmt, EstimatedBiomassDirFmt
 
 
 citations = Citations.load('citations.bib', package='q2_katharoseq')
@@ -19,6 +22,12 @@
 )
 
 
+plugin.register_formats(EstimatedBiomassFmt, EstimatedBiomassDirFmt)
+plugin.register_semantic_types(EstimatedBiomass)
+plugin.register_semantic_type_to_format(EstimatedBiomass,
+                                        artifact_format=EstimatedBiomassDirFmt)
+
+
 plugin.visualizers.register_function(
     function=read_count_threshold,
     inputs={
@@ -60,3 +69,55 @@
                 'positive signal in samples with as few as 50 to 500 cells.',
     citations=[citations['minich2018']]
 )
+
+
+plugin.methods.register_function(
+    function=estimating_biomass,
+    inputs={},
+    parameters={'total_reads': MetadataColumn[Numeric],
+                'control_cell_extraction': MetadataColumn[Numeric],
+                'positive_control_column': MetadataColumn[Categorical],
+                'positive_control_value': Str,
+                'extraction_mass_g': MetadataColumn[Numeric],
+                'min_total_reads': Int,
+                'pcr_template_vol': Int,
+                'dna_extract_vol': Int},
+    outputs=[('estimated_biomass', EstimatedBiomass)],
+    input_descriptions={},
+    parameter_descriptions={
+        'total_reads': 'The total sum of the reads or ASVs for each sample.',
+        'control_cell_extraction': (
+            'The estimated number of cells or genomes used as input to your '
+            'library prep. One may typically estimate this by determining the '
+            'total number of cells from a stock solution used to make '
+            'standard titrations. Each titration will have an estimated '
+            'number of microbial cells put into the extraction. The final '
+            'estimate will depend on the elution volume and the final volume '
+            'used into the library prep (e.g. 16S PCR).'),
+        'positive_control_column': (
+            'The column in the sample metadata that describes which samples '
+            'are and are not controls.'),
+        'positive_control_value': (
+            'The value in the control column that demarks which samples are '
+            'the positive controls.'),
+        'extraction_mass_g': (
+            'The column in the sample metadata that describes the sample '
+            '(e.g. stool, tissue, soil, etc) mass (in grams - typically '
+            'converted from mg)'),
+        'min_total_reads': 'The minimum threshold to apply.',
+        'pcr_template_vol': (
+            'The volume of DNA used as template in the '
+            'library prep (PCR reaction)'),
+        'dna_extract_vol': (
+            'The final elution volume used during DNA extraction')},
+    output_descriptions={
+        'estimated_biomass': (
+            'A dataframe containing the details on estimated biomass')
+        },
+    name='Estimate the biomass of samples using KatharoSeq controls.',
+    description='Estimate the biomass of samples using KatharoSeq controls.',
+    citations=[]
+)
+
+
+importlib.import_module('q2_katharoseq._transformer')
diff --git a/q2_katharoseq/tests/support_files/input_estimating_biomass.tsv b/q2_katharoseq/tests/support_files/input_estimating_biomass.tsv
@@ -1,4 +1,5 @@
 sample_name	total_reads	control_cell_into_extraction	extraction_mass_g	positive_control
+#q2:types	numeric	numeric	numeric	categorical
 13414.plate1.h9	4			False
 13414.plate1.b10	6			False
 13414.plate1.e12	6			False

diff --git a/q2_katharoseq/tests/test_method.py b/q2_katharoseq/tests/test_method.py
@@ -166,25 +166,25 @@ def test_threshold(self):
 
     def test_estimating_biomass(self):
         fp = join(dirname(abspath(getfile(currentframe()))), 'support_files')
+
         data = pd.read_csv(
             f'{fp}/input_estimating_biomass.tsv', sep='\t', dtype={
                 'sample_name': str, 'total_reads': float,
                 'control_cell_into_extraction': float,
                 'extraction_mass_g': float,
                 'positive_control': str})
 
+        data = qiime2.Metadata.load(f'{fp}/input_estimating_biomass.tsv')
+
         obs = estimating_biomass(
-            total_reads=qiime2.NumericMetadataColumn(data['total_reads']),
-            control_cell_extraction=qiime2.NumericMetadataColumn(
-                data['control_cell_into_extraction']),
+            total_reads=data.get_column('total_reads'),
+            control_cell_extraction=data.get_column('control_cell_into_extraction'),  # noqa
             min_total_reads=1150,
             positive_control_value='True',
-            positive_control_column=qiime2.CategoricalMetadataColumn(
-                data['positive_control']),
+            positive_control_column=data.get_column('positive_control'),
             pcr_template_vol=5,
             dna_extract_vol=60,
-            extraction_mass_g=qiime2.NumericMetadataColumn(
-                data['extraction_mass_g'])
+            extraction_mass_g=data.get_column('extraction_mass_g')
         )
         exp = pd.read_csv(
             f'{fp}/output_estimating_biomass.tsv', sep='\t', index_col=0)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from qiime2.plugin import SemanticType

		EstimatedBiomass = SemanticType('EstimatedBiomass')