Add README link (#8)

* Added example folder with example files * Update README * update readme; change input to estimating-biomass * update readme * flake8 fixes * add est_biomass.csv to example folder * add est_biomass_output.csv to example folder * updated estimating_biomass test * flake8 fix * update unit testing * update unit tests * flake8 fixes * typo fix * typo fix * typo fix * typo fix * change unit test file paths * flake8 fix * fix filepath in tests * flake8 fixes * newer version of qiime2 links added * add readme linnk to fish microbiome paper * changes from code review * flake8 fixes * add DOI
biocore · Oct 17, 2022 · 4897535 · 4897535
1 parent 57e67de
commit 4897535
Show file tree

Hide file tree

Showing 3 changed files with 51 additions and 46 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,9 @@
 # KatharoSeq
 
-An implementation of the KatharoSeq protocol, originally defined in [Minich et al 2018 mSystems](https://journals.asm.org/doi/10.1128/mSystems.00218-17).
+![](https://github.com/qiime2/q2templates/workflows/ci/badge.svg) [![DOI](https://zenodo.org/badge/459343122.svg)](https://zenodo.org/badge/latestdoi/459343122)
+
+
+An implementation of the KatharoSeq protocol, originally defined in [Minich et al 2018 mSystems](https://journals.asm.org/doi/10.1128/mSystems.00218-17) and in [Minich et al 2022](https://www.biorxiv.org/content/10.1101/2022.03.07.483203v1)
 
 ## Installation
 
@@ -16,6 +19,8 @@ pip install -e .
 
 Computation assumes that the user has classified their 16S features against SILVA, and that the `FeatureTable[Frequency]` has been collapsed to the genus level. Please see the [`q2-feature-classifier`](https://docs.qiime2.org/2022.2/plugins/available/feature-classifier/classify-sklearn/) for detail on how to perform taxonomy classification, and the [`q2-taxa`](https://docs.qiime2.org/2022.2/plugins/available/taxa/collapse/) plugin for information on collapsing to a taxonomic level. If you need more information on how to process your data, please refer to one of the relevant tutorials that can be found [here](https://docs.qiime2.org/2022.2/tutorials/). For these examples, data from the Fish Microbiome Project (FMP): [Fish microbiomes 101: disentangling the rules governing marine fish mucosal microbiomes across 101 species](https://www.biorxiv.org/content/10.1101/2022.03.07.483203v1) paper will be used, and can be found in the `example` folder.
 
+For a less stringent filter, an appropriate value for `--p-threshold` would be around 50. For a more strict filter, use a value like 90, as shown below in the example.
+
 
 ## Read Count Threshold
 
@@ -62,11 +67,11 @@ Finally in order to visualize the results from `estimating-biomass`, run `biomas
 ```
 qiime katharoseq biomass-plot \
     --i-table example/fmp_collapsed_table.qza \
-    --m-control-cell-extraction-file example/fmp_metadata_mod.tsv \
+    --m-control-cell-extraction-file example/fmp_metadata.tsv \
     --m-control-cell-extraction-column control_cell_into_extraction \
     --p-min-total-reads 1315 \
     --p-positive-control-value control \
-    --m-positive-control-column-file example/fmp_metadata_mod.tsv \
+    --m-positive-control-column-file example/fmp_metadata.tsv \
     --m-positive-control-column-column control_rct \
     --o-visualization biomass_plot_fmp
 ```

diff --git a/q2_katharoseq/_methods.py b/q2_katharoseq/_methods.py
@@ -14,7 +14,7 @@
         'd__Bacteria;p__Firmicutes;c__Clostridia;o__Clostridiales;'
         'f__Clostridiaceae;g__Clostridium',
         'd__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;'
-        'o__Enterobacterales;f__Enterobacteriaceae;__',
+        'o__Enterobacterales;f__Enterobacteriaceae;g__',
         'd__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;'
         'o__Enterobacterales;f__Enterobacteriaceae;'
         'g__Escherichia-Shigella',
@@ -66,6 +66,36 @@ def get_threshold(r1, r2, thresh):
     return min_freq
 
 
+def fit_lm(table,
+           min_total_reads,
+           positive_control_column,
+           positive_control_value,
+           control_cell_extraction):
+
+    total_reads = table.sum(axis=1)
+    filtered = pd.DataFrame(total_reads[total_reads > min_total_reads])
+    filtered = filtered.rename(columns={0: 'total_reads'})
+    filtered['log_total_reads'] = filtered.total_reads.apply(math.log10)
+
+    positive_control_column = positive_control_column.to_series().loc[
+        filtered.index]
+    positive_controls = positive_control_column[
+        positive_control_column == positive_control_value]
+    positive_controls = filtered.loc[positive_controls.index]
+
+    positive_controls['control_cell_extraction'] = \
+        control_cell_extraction.to_series().loc[positive_controls.index]
+    positive_controls['log_control_cell_extraction'] = \
+        positive_controls.control_cell_extraction.apply(math.log10)
+
+    lm = LinearRegression()
+    lm.fit(
+        positive_controls.log_total_reads.values.reshape(-1, 1),
+        positive_controls.log_control_cell_extraction)
+
+    return lm, filtered, positive_controls
+
+
 def read_count_threshold(
         output_dir: str,
         threshold: int,
@@ -169,26 +199,11 @@ def estimating_biomass(
         dna_extract_vol: int,
         extraction_mass_g: qiime2.NumericMetadataColumn) -> pd.DataFrame:
 
-    total_reads = table.sum(axis=1)
-    filtered = pd.DataFrame(total_reads[total_reads > min_total_reads])
-    filtered = filtered.rename(columns={0: 'total_reads'})
-    filtered['log_total_reads'] = filtered.total_reads.apply(math.log10)
-
-    positive_control_column = positive_control_column.to_series().loc[
-        filtered.index]
-    positive_controls = positive_control_column[
-        positive_control_column == positive_control_value]
-    positive_controls = filtered.loc[positive_controls.index]
-
-    positive_controls['control_cell_extraction'] = \
-        control_cell_extraction.to_series().loc[positive_controls.index]
-    positive_controls['log_control_cell_extraction'] = \
-        positive_controls.control_cell_extraction.apply(math.log10)
-
-    lm = LinearRegression()
-    lm.fit(
-        positive_controls.log_total_reads.values.reshape(-1, 1),
-        positive_controls.log_control_cell_extraction)
+    lm, filtered, positive_controls = fit_lm(table,
+                                             min_total_reads,
+                                             positive_control_column,
+                                             positive_control_value,
+                                             control_cell_extraction)
 
     filtered['estimated_biomass_per_pcrrxn'] = \
         10**((filtered.log_total_reads*lm.coef_[0])+lm.intercept_)
@@ -216,26 +231,11 @@ def biomass_plot(
         positive_control_value: str,
         positive_control_column: qiime2.CategoricalMetadataColumn) -> None:
 
-    total_reads = table.sum(axis=1)
-    filtered = pd.DataFrame(total_reads[total_reads > min_total_reads])
-    filtered = filtered.rename(columns={0: 'total_reads'})
-    filtered['log_total_reads'] = filtered.total_reads.apply(math.log10)
-
-    positive_control_column = positive_control_column.to_series().loc[
-        filtered.index]
-    positive_controls = positive_control_column[
-        positive_control_column == positive_control_value]
-    positive_controls = filtered.loc[positive_controls.index]
-
-    positive_controls['control_cell_extraction'] = \
-        control_cell_extraction.to_series().loc[positive_controls.index]
-    positive_controls['log_control_cell_extraction'] = \
-        positive_controls.control_cell_extraction.apply(math.log10)
-
-    lm = LinearRegression()
-    lm.fit(
-        positive_controls.log_total_reads.values.reshape(-1, 1),
-        positive_controls.log_control_cell_extraction)
+    lm, filtered, positive_controls = fit_lm(table,
+                                             min_total_reads,
+                                             positive_control_column,
+                                             positive_control_value,
+                                             control_cell_extraction)
 
     # MAKE PLOT
     y = positive_controls['log_control_cell_extraction']

diff --git a/q2_katharoseq/estimating_biomass_assets/index.html b/q2_katharoseq/estimating_biomass_assets/index.html
@@ -10,7 +10,7 @@ <h1>Katharoseq Protocol</h1>
           <a href="fit.svg" target="_blank" rel="noopener noreferrer" class="btn btn-default">
             Download SVG
           </a>
-        </div>
-  </div>
+    </div>
+</div>
 
 {% endblock %}