Skip to content

Commit

Permalink
Merge pull request #4 from biocore/visualizer
Browse files Browse the repository at this point in the history
adding a visualizer to estimating_biomass
  • Loading branch information
wasade authored Mar 1, 2022
2 parents fcba4b4 + 6f02570 commit 3c47f32
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 27 deletions.
3 changes: 2 additions & 1 deletion q2_katharoseq/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# ----------------------------------------------------------------------------
from . import _version
from ._methods import read_count_threshold, estimating_biomass
from ._methods import biomass_plot

__version__ = _version.get_versions()['version']
__all__ = ['read_count_threshold', 'estimating_biomass']
__all__ = ['read_count_threshold', 'estimating_biomass', 'biomass_plot']
40 changes: 34 additions & 6 deletions q2_katharoseq/_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,14 +191,45 @@ def estimating_biomass(
filtered['log_estimated_cells_per_g'] = \
filtered.estimated_cells_per_g.apply(math.log10)

return filtered


def biomass_plot(
output_dir: str,
total_reads: qiime2.NumericMetadataColumn,
control_cell_extraction: qiime2.NumericMetadataColumn,
min_total_reads: int,
positive_control_value: str,
positive_control_column: qiime2.CategoricalMetadataColumn) -> None:

total_reads = total_reads.to_series()
filtered = pd.DataFrame(total_reads[total_reads > min_total_reads])
filtered['log_total_reads'] = filtered.total_reads.apply(math.log10)

positive_control_column = positive_control_column.to_series().loc[
filtered.index]
positive_controls = positive_control_column[
positive_control_column == positive_control_value]
positive_controls = filtered.loc[positive_controls.index]

positive_controls['control_cell_extraction'] = \
control_cell_extraction.to_series().loc[positive_controls.index]
positive_controls['log_control_cell_extraction'] = \
positive_controls.control_cell_extraction.apply(math.log10)

lm = LinearRegression()
lm.fit(
positive_controls.log_total_reads.values.reshape(-1, 1),
positive_controls.log_control_cell_extraction)

# MAKE PLOT
y = positive_controls['log_control_cell_extraction']
x = positive_controls['log_total_reads']
intercept = lm.intercept_
slope = lm.coef_[0]

plt.clf()
plt.scatter(x,y, color='black')
plt.scatter(x, y, color='black')
axes = plt.gca()
x_vals = np.array(axes.get_xlim())
y_vals = intercept + slope * x_vals
Expand All @@ -208,11 +239,8 @@ def estimating_biomass(
plt.savefig(os.path.join(output_dir, 'fit.svg'))
plt.close()

# VISUALIZER
context = {'r_squared': r_squared}
# VISUALIZER: git push origin visualizer
TEMPLATES = pkg_resources.resource_filename(
'q2_katharoseq', 'estimating_biomass_assets')
index = os.path.join(TEMPLATES, 'index.html')
q2templates.render(index, output_dir, context=context)

return filtered
q2templates.render(index, output_dir)
37 changes: 34 additions & 3 deletions q2_katharoseq/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from qiime2.plugin import (Plugin, Citations, Str, Int,
MetadataColumn, Categorical, Numeric, Choices)
from q2_types.feature_table import (FeatureTable, Frequency)
from . import read_count_threshold, estimating_biomass
from . import read_count_threshold, estimating_biomass, biomass_plot
import q2_katharoseq
from q2_katharoseq._type import EstimatedBiomass
from q2_katharoseq._format import EstimatedBiomassFmt, EstimatedBiomassDirFmt
Expand Down Expand Up @@ -67,10 +67,9 @@
description='KatharoSeq is high-throughput protocol combining laboratory '
'and bioinformatic methods that can differentiate a true '
'positive signal in samples with as few as 50 to 500 cells.',
citations=[citations['minich2018']]
citations=[]
)


plugin.methods.register_function(
function=estimating_biomass,
inputs={},
Expand Down Expand Up @@ -119,5 +118,37 @@
citations=[]
)

plugin.visualizers.register_function(
function=biomass_plot,
inputs={},
parameters={'total_reads': MetadataColumn[Numeric],
'control_cell_extraction': MetadataColumn[Numeric],
'positive_control_column': MetadataColumn[Categorical],
'positive_control_value': Str,
'min_total_reads': Int
},
input_descriptions={},
parameter_descriptions={
'total_reads': 'The total sum of the reads or ASVs for each sample.',
'control_cell_extraction': (
'The estimated number of cells or genomes used as input to your '
'library prep. One may typically estimate this by determining the '
'total number of cells from a stock solution used to make '
'standard titrations. Each titration will have an estimated '
'number of microbial cells put into the extraction. The final '
'estimate will depend on the elution volume and the final volume '
'used into the library prep (e.g. 16S PCR).'),
'positive_control_column': (
'The column in the sample metadata that describes which samples '
'are and are not controls.'),
'positive_control_value': (
'The value in the control column that demarks which samples are '
'the positive controls.'),
'min_total_reads': 'The minimum threshold to apply.',
},
name='Plot the results of estimating_biomass.',
description='Plot the results of estimating_biomass.',
citations=[]
)

importlib.import_module('q2_katharoseq._transformer')
45 changes: 28 additions & 17 deletions q2_katharoseq/tests/test_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
from qiime2 import CategoricalMetadataColumn
from qiime2 import NumericMetadataColumn

from q2_katharoseq import read_count_threshold, estimating_biomass
from q2_katharoseq import (read_count_threshold,
estimating_biomass,
biomass_plot)
from q2_katharoseq._methods import allosteric_sigmoid
from q2_katharoseq._methods import get_threshold

Expand Down Expand Up @@ -151,29 +153,22 @@ def test_no_positive_controls_in_table(self):
self.control)

def test_sigmoid(self):
x = 1
h = 2
k_prime = 3
x = 1.0
h = 2.0
k_prime = 3.0
a = allosteric_sigmoid(x, h, k_prime)
self.assertTrue(a == .25)

def test_threshold(self):
r1 = 2
r2 = 3
thresh = 50
r1 = [3.5, 2.3, 1.3, 3.4]
r2 = [1.1, 2.2, 1.7, 2.3]
thresh = 50.0
min_freq = get_threshold(r1, r2, thresh)
self.assertTrue(min_freq == 37)
self.assertTrue(min_freq == 1)

def test_estimating_biomass(self):
fp = join(dirname(abspath(getfile(currentframe()))), 'support_files')

data = pd.read_csv(
f'{fp}/input_estimating_biomass.tsv', sep='\t', dtype={
'sample_name': str, 'total_reads': float,
'control_cell_into_extraction': float,
'extraction_mass_g': float,
'positive_control': str})

data = qiime2.Metadata.load(f'{fp}/input_estimating_biomass.tsv')

obs = estimating_biomass(
Expand All @@ -189,9 +184,25 @@ def test_estimating_biomass(self):
exp = pd.read_csv(
f'{fp}/output_estimating_biomass.tsv', sep='\t', index_col=0)
pd.testing.assert_frame_equal(obs, exp)
index_fp = os.path.join(output_dir, 'index.html')
self.assertTrue(os.path.exists(index_fp))

def test_biomass_plot(self):
fp = join(dirname(abspath(getfile(currentframe()))), 'support_files')

data = qiime2.Metadata.load(f'{fp}/input_estimating_biomass.tsv')

with tempfile.TemporaryDirectory() as output_dir:
biomass_plot(
output_dir,
total_reads=data.get_column('total_reads'),
control_cell_extraction=data.get_column(
'control_cell_into_extraction'), # noqa
min_total_reads=1150,
positive_control_value='True',
positive_control_column=data.get_column('positive_control')
)

index_fp = os.path.join(output_dir, 'index.html')
self.assertTrue(os.path.exists(index_fp))


if __name__ == '__main__':
Expand Down

0 comments on commit 3c47f32

Please sign in to comment.