fix bugs

idriskb · Feb 4, 2021 · 9cabb61 · 9cabb61
1 parent d20df73
commit 9cabb61
Show file tree

Hide file tree

Showing 14 changed files with 39 additions and 17 deletions.
diff --git a/episcanpy/api/__init__.py b/episcanpy/api/__init__.py
@@ -1,5 +1,5 @@
+import scanpy as sc
 from anndata import AnnData
-
 from anndata import read as read_h5ad
 from anndata import read_csv, read_excel, read_hdf, read_loom, read_mtx, read_text, read_umi_tools
 

diff --git a/episcanpy/count_matrix/_atac_mtx.py b/episcanpy/count_matrix/_atac_mtx.py
@@ -183,7 +183,8 @@ def save_sparse_mtx(initial_matrix, output_file='.h5ad', path='', omic='ATAC', b
     Parameters
     ----------
 
-    initial_matrix: initial dense count matrix to load and convert into a sparse matrix 
+    initial_matrix: initial dense count matrix to load and convert into a sparse matrix. 
+    If bed = True,  initial_matrix should be the path to the bed file.
 
     output_file: name of the output file for the AnnData object.
     Default output is the name of the input file with .h5ad extension
@@ -210,7 +211,7 @@ def save_sparse_mtx(initial_matrix, output_file='.h5ad', path='', omic='ATAC', b
 
     # choice between 2 different input count matrix formats
     if bed == True:
-        adata = read_mtx_bed(file_name, path, omic)
+        adata = read_mtx_bed(initial_matrix, path, omic)
     else:
         # reading the non sparse file
         with open(path+initial_matrix) as f:
@@ -245,4 +246,4 @@ def save_sparse_mtx(initial_matrix, output_file='.h5ad', path='', omic='ATAC', b
 
         adata.write(output_file)
 
-    return(adata)
+    return(adata)
diff --git a/episcanpy/count_matrix/_bld_met_mtx.py b/episcanpy/count_matrix/_bld_met_mtx.py
@@ -112,13 +112,15 @@ def extract_methylation(sample_name, feature, meth_type=None, path='', #head=HEA
     feature:
     meth_type:
         CG, CH or not specified
-    head: if there is header that you don't want to read. An annotation in the
-        file you are reading. The default value is the Methylpy/Ecker header
     path: path of the to access the file of the sample you want to read. 
     chromosome: chromosomes if the species you are considering. default value
         is the human genome (including mitochondrial and sexual chromosomes)
     cell_names is the list of cells you want to put as annotation at the beginning of your file
     """
+    #old parameter
+    #head: if there is header that you don't want to read. An annotation in the
+    #file you are reading. The default value is the Methylpy/Ecker header
+
     # It correspond to the annotation in the methylation call for methylpy.
     # (at least for the Ecker dataset)
     # I specify it here so I can automatically skip the line when I am reading the
@@ -149,7 +151,8 @@ def extract_methylation(sample_name, feature, meth_type=None, path='', #head=HEA
     	reduced_cyt = read_meth_fileCH(sample_name, path, chromosome, pos, met, tot, status)
     else:
     	# print a warning saying that the argument is not valid. We take all cytosines
-    	reduced_cyt = read_meth_file(sample_name, head, path, chromosome)
+    	#reduced_cyt = read_meth_file(sample_name, head, path, chromosome)
+    	reduced_cyt = read_meth_file(sample_name, path, chromosome)
 
     final_output = methylation_level(reduced_cyt, feature, chromosome, threshold)
     if write:

diff --git a/episcanpy/count_matrix/_features.py b/episcanpy/count_matrix/_features.py
@@ -1,3 +1,6 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
 # chromosomes for 2 principal species. If you work with another genome
 # the chromosomes will have to be specified
 # mitochondrial genome not included

diff --git a/episcanpy/count_matrix/_load_input_file.py b/episcanpy/count_matrix/_load_input_file.py
@@ -1,3 +1,5 @@
+from . import extract_CG
+from . import extract_CH
 def read_cyt_summary(sample_name, meth_type, head, path, chromosome):
     """
     Read file from which you want to extract the methylation level and

diff --git a/episcanpy/count_matrix/_read_meth_file.py b/episcanpy/count_matrix/_read_meth_file.py
@@ -1,3 +1,6 @@
+from . import extract_CG
+from . import extract_CH
+
 def read_methylation_file(sample_name, meth_type, head, path, chromosome):
     """
     Read file from which you want to extract the methylation level and

diff --git a/episcanpy/plotting/_silhouette.py b/episcanpy/plotting/_silhouette.py
@@ -5,7 +5,7 @@
 import matplotlib.cm as cm
 import numpy as np
 
-def silhouette(adata_name, cluster_annot, key=None,
+def silhouette(adata_name, cluster_annot, value='X_pca', metric='euclidean', key=None,
               xlabel=None, ylabel=None, title=None, size='large',
                name_cluster=True, name_cluster_pos='left', 
               palette=None, save=None):
@@ -18,6 +18,10 @@ def silhouette(adata_name, cluster_annot, key=None,
     adata_name: AnnData object
 
     cluster_annot: observational variable corresponding to a cell clustering
+    
+    value: measure used to build the silhouette plot (X_pca, X_tsne, X_umap)
+    
+    metric: 'euclidean'    
 
     key: specify name of precomputed silhouette scores if not standard
 

diff --git a/episcanpy/preprocessing/_metadata.py b/episcanpy/preprocessing/_metadata.py
@@ -1,5 +1,6 @@
 import anndata as ad
 import pandas as pd
+import pyranges as pr
 
 def load_metadata(adata, metadata_file, path='', separator=';', remove_index_str = None):
     """

diff --git a/episcanpy/tools/_features_selection.py b/episcanpy/tools/_features_selection.py
@@ -38,22 +38,22 @@ def rank_features(adata, groupby, omic=None, use_raw=True, groups='all', referen
     	if copy==False:
     		sc.tl.rank_genes_groups(adata=adata, groupby=groupby, use_raw=use_raw,
                 groups=groups, reference=reference, n_genes=n_features,
-    			rankby_abs=True, key_added=key_added, copy=False, method='t-test', corr_method='benjamini-hochberg')
+                rankby_abs=True, key_added=key_added, copy=False, method='t-test', corr_method='benjamini-hochberg')
     	else:
     		adata2 = sc.tl.rank_genes_groups(adata=adata, groupby=groupby, use_raw=use_raw,
                 groups=groups, reference=reference, n_genes=n_features,
-    			rankby_abs=True, key_added=key_added, copy=True, method='t-test', corr_method='benjamini-hochberg')
+                rankby_abs=True, key_added=key_added, copy=True, method='t-test', corr_method='benjamini-hochberg')
     		return(adata2)
     else:
     	if copy==False:
     		sc.tl.rank_genes_groups(adata=adata, groupby=groupby, use_raw=use_raw,
-                            groups=groups, reference=reference, n_genes=n_features,
-                     		rankby_abs=rankby_abs, key_added=key_added, copy=False, method=method,
-                     		corr_method=corr_method, **kwds)
+    		                      groups=groups, reference=reference, n_genes=n_features,
+    		                      rankby_abs=rankby_abs, key_added=key_added, copy=False, method=method,
+    		                      corr_method=corr_method, **kwds)
     	else:
     		adata2 = sc.tl.rank_genes_groups(adata=adata, groupby=groupby, use_raw=use_raw,
                             groups=groups, reference=reference, n_genes=n_features,
-                     		rankby_abs=rankby_ab, key_added=key_added, copy=True, method=method,
+                            rankby_abs=rankby_abs, key_added=key_added, copy=True, method=method,
                             corr_method=corr_method, **kwds)
     		return(adata2)
 

diff --git a/episcanpy/tools/_find_genes2.py b/episcanpy/tools/_find_genes2.py
@@ -7,6 +7,7 @@
 import matplotlib.axes as pltax
 import pandas as pd
 import pyranges as pr
+import time
 
 def find_genes(adata, gtf_file_name, path='', extension=5000,
     key_added='gene_name', feature_coordinates=None, copy=True):
@@ -33,6 +34,7 @@ def find_genes(adata, gtf_file_name, path='', extension=5000,
     """
 
     # load the gtf file
+    start = time.time()
     gtf_file = []
     with open(gtf_file_name) as f:
         for line in f:

diff --git a/episcanpy/tools/_impute_meth.py b/episcanpy/tools/_impute_meth.py
@@ -1,4 +1,6 @@
 import anndata as ad
+import numpy as np
+import pandas as pd
 
 def readandimputematrix(file_name, min_coverage=1):
     """

diff --git a/episcanpy/tools/_scanpy_fct_features.py b/episcanpy/tools/_scanpy_fct_features.py
@@ -1,10 +1,8 @@
 import scanpy as sc
 
-## everything here is copied from scanpy and adapted
-
+# everything here is copied from scanpy and adapted
 # pca, diffmap, draw_graph, tsne, umap --> I also need it in tools
 # heatmap, violin and matrixplot and heatmap and rank_gene_groups version
-
 def stacked_violinstacked_violin(
     adata: AnnData,
     var_names,

diff --git a/episcanpy/utils.py b/episcanpy/utils.py
@@ -16,6 +16,7 @@
 
 from . import settings
 from . import logging as logg
+from . import plotting as pl
 
 EPS = 1e-15
 

diff --git a/versioneer.py b/versioneer.py
@@ -388,9 +388,11 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
     """Call the given command(s)."""
     assert isinstance(commands, list)
     p = None
+    dispcmd = None
     for c in commands:
         try:
             # remember shell=False, so use git.cmd on windows, not just git
+            dispcmd = str([c] + args)
             p = subprocess.Popen([c] + args, cwd=cwd, env=env,
                                  stdout=subprocess.PIPE,
                                  stderr=(subprocess.PIPE if hide_stderr