diff --git a/q2_katharoseq/__init__.py b/q2_katharoseq/__init__.py index 1e2da02..a395404 100644 --- a/q2_katharoseq/__init__.py +++ b/q2_katharoseq/__init__.py @@ -7,6 +7,7 @@ # ---------------------------------------------------------------------------- from . import _version from ._methods import read_count_threshold, estimating_biomass +from ._methods import biomass_plot __version__ = _version.get_versions()['version'] -__all__ = ['read_count_threshold', 'estimating_biomass'] +__all__ = ['read_count_threshold', 'estimating_biomass', 'biomass_plot'] diff --git a/q2_katharoseq/_methods.py b/q2_katharoseq/_methods.py index 3959cac..9e77e1f 100644 --- a/q2_katharoseq/_methods.py +++ b/q2_katharoseq/_methods.py @@ -191,6 +191,37 @@ def estimating_biomass( filtered['log_estimated_cells_per_g'] = \ filtered.estimated_cells_per_g.apply(math.log10) + return filtered + + +def biomass_plot( + output_dir: str, + total_reads: qiime2.NumericMetadataColumn, + control_cell_extraction: qiime2.NumericMetadataColumn, + min_total_reads: int, + positive_control_value: str, + positive_control_column: qiime2.CategoricalMetadataColumn) -> None: + + total_reads = total_reads.to_series() + filtered = pd.DataFrame(total_reads[total_reads > min_total_reads]) + filtered['log_total_reads'] = filtered.total_reads.apply(math.log10) + + positive_control_column = positive_control_column.to_series().loc[ + filtered.index] + positive_controls = positive_control_column[ + positive_control_column == positive_control_value] + positive_controls = filtered.loc[positive_controls.index] + + positive_controls['control_cell_extraction'] = \ + control_cell_extraction.to_series().loc[positive_controls.index] + positive_controls['log_control_cell_extraction'] = \ + positive_controls.control_cell_extraction.apply(math.log10) + + lm = LinearRegression() + lm.fit( + positive_controls.log_total_reads.values.reshape(-1, 1), + positive_controls.log_control_cell_extraction) + # MAKE PLOT y = positive_controls['log_control_cell_extraction'] x = positive_controls['log_total_reads'] @@ -198,7 +229,7 @@ def estimating_biomass( slope = lm.coef_[0] plt.clf() - plt.scatter(x,y, color='black') + plt.scatter(x, y, color='black') axes = plt.gca() x_vals = np.array(axes.get_xlim()) y_vals = intercept + slope * x_vals @@ -208,11 +239,8 @@ def estimating_biomass( plt.savefig(os.path.join(output_dir, 'fit.svg')) plt.close() - # VISUALIZER - context = {'r_squared': r_squared} + # VISUALIZER: git push origin visualizer TEMPLATES = pkg_resources.resource_filename( 'q2_katharoseq', 'estimating_biomass_assets') index = os.path.join(TEMPLATES, 'index.html') - q2templates.render(index, output_dir, context=context) - - return filtered + q2templates.render(index, output_dir) diff --git a/q2_katharoseq/plugin_setup.py b/q2_katharoseq/plugin_setup.py index 6c7d98f..f6afe23 100644 --- a/q2_katharoseq/plugin_setup.py +++ b/q2_katharoseq/plugin_setup.py @@ -2,7 +2,7 @@ from qiime2.plugin import (Plugin, Citations, Str, Int, MetadataColumn, Categorical, Numeric, Choices) from q2_types.feature_table import (FeatureTable, Frequency) -from . import read_count_threshold, estimating_biomass +from . import read_count_threshold, estimating_biomass, biomass_plot import q2_katharoseq from q2_katharoseq._type import EstimatedBiomass from q2_katharoseq._format import EstimatedBiomassFmt, EstimatedBiomassDirFmt @@ -67,10 +67,9 @@ description='KatharoSeq is high-throughput protocol combining laboratory ' 'and bioinformatic methods that can differentiate a true ' 'positive signal in samples with as few as 50 to 500 cells.', - citations=[citations['minich2018']] + citations=[] ) - plugin.methods.register_function( function=estimating_biomass, inputs={}, @@ -119,5 +118,37 @@ citations=[] ) +plugin.visualizers.register_function( + function=biomass_plot, + inputs={}, + parameters={'total_reads': MetadataColumn[Numeric], + 'control_cell_extraction': MetadataColumn[Numeric], + 'positive_control_column': MetadataColumn[Categorical], + 'positive_control_value': Str, + 'min_total_reads': Int + }, + input_descriptions={}, + parameter_descriptions={ + 'total_reads': 'The total sum of the reads or ASVs for each sample.', + 'control_cell_extraction': ( + 'The estimated number of cells or genomes used as input to your ' + 'library prep. One may typically estimate this by determining the ' + 'total number of cells from a stock solution used to make ' + 'standard titrations. Each titration will have an estimated ' + 'number of microbial cells put into the extraction. The final ' + 'estimate will depend on the elution volume and the final volume ' + 'used into the library prep (e.g. 16S PCR).'), + 'positive_control_column': ( + 'The column in the sample metadata that describes which samples ' + 'are and are not controls.'), + 'positive_control_value': ( + 'The value in the control column that demarks which samples are ' + 'the positive controls.'), + 'min_total_reads': 'The minimum threshold to apply.', + }, + name='Plot the results of estimating_biomass.', + description='Plot the results of estimating_biomass.', + citations=[] +) importlib.import_module('q2_katharoseq._transformer') diff --git a/q2_katharoseq/tests/test_method.py b/q2_katharoseq/tests/test_method.py index a9b6f05..cfe56b3 100644 --- a/q2_katharoseq/tests/test_method.py +++ b/q2_katharoseq/tests/test_method.py @@ -7,7 +7,9 @@ from qiime2 import CategoricalMetadataColumn from qiime2 import NumericMetadataColumn -from q2_katharoseq import read_count_threshold, estimating_biomass +from q2_katharoseq import (read_count_threshold, + estimating_biomass, + biomass_plot) from q2_katharoseq._methods import allosteric_sigmoid from q2_katharoseq._methods import get_threshold @@ -151,29 +153,22 @@ def test_no_positive_controls_in_table(self): self.control) def test_sigmoid(self): - x = 1 - h = 2 - k_prime = 3 + x = 1.0 + h = 2.0 + k_prime = 3.0 a = allosteric_sigmoid(x, h, k_prime) self.assertTrue(a == .25) def test_threshold(self): - r1 = 2 - r2 = 3 - thresh = 50 + r1 = [3.5, 2.3, 1.3, 3.4] + r2 = [1.1, 2.2, 1.7, 2.3] + thresh = 50.0 min_freq = get_threshold(r1, r2, thresh) - self.assertTrue(min_freq == 37) + self.assertTrue(min_freq == 1) def test_estimating_biomass(self): fp = join(dirname(abspath(getfile(currentframe()))), 'support_files') - data = pd.read_csv( - f'{fp}/input_estimating_biomass.tsv', sep='\t', dtype={ - 'sample_name': str, 'total_reads': float, - 'control_cell_into_extraction': float, - 'extraction_mass_g': float, - 'positive_control': str}) - data = qiime2.Metadata.load(f'{fp}/input_estimating_biomass.tsv') obs = estimating_biomass( @@ -189,9 +184,25 @@ def test_estimating_biomass(self): exp = pd.read_csv( f'{fp}/output_estimating_biomass.tsv', sep='\t', index_col=0) pd.testing.assert_frame_equal(obs, exp) - index_fp = os.path.join(output_dir, 'index.html') - self.assertTrue(os.path.exists(index_fp)) + def test_biomass_plot(self): + fp = join(dirname(abspath(getfile(currentframe()))), 'support_files') + + data = qiime2.Metadata.load(f'{fp}/input_estimating_biomass.tsv') + + with tempfile.TemporaryDirectory() as output_dir: + biomass_plot( + output_dir, + total_reads=data.get_column('total_reads'), + control_cell_extraction=data.get_column( + 'control_cell_into_extraction'), # noqa + min_total_reads=1150, + positive_control_value='True', + positive_control_column=data.get_column('positive_control') + ) + + index_fp = os.path.join(output_dir, 'index.html') + self.assertTrue(os.path.exists(index_fp)) if __name__ == '__main__':