update 2.1.0 evaluate.cell

z676521995 · Jun 6, 2021 · ec047f5 · ec047f5
1 parent e833ca9
commit ec047f5
Show file tree

Hide file tree

Showing 6 changed files with 150 additions and 8 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -12,5 +12,5 @@ LinkingTo: Rcpp, RcppEigen
 NeedsCompilation: yes
 BugReports: https://github.com/sunduanchen/Scissor/issues
 URL: https://github.com/sunduanchen/Scissor
-RoxygenNote: 7.1.0
+RoxygenNote: 7.1.1
 Encoding: UTF-8
diff --git a/R/Scissor.R b/R/Scissor.R
@@ -39,6 +39,7 @@
 #' Please see Scissor Tutorial for more details.
 #'
 #' @return This function returns a list with the following components:
+#'   \item{para}{A list contains the final model parameters.}
 #'   \item{Coefs}{The regression coefficient for each cell.}
 #'   \item{Scissor_pos}{The cell IDs of Scissor+ cells.}
 #'   \item{Scissor_neg}{The cell IDs of Scissor- cells.}
@@ -80,8 +81,8 @@ Scissor <- function(bulk_dataset, sc_dataset, phenotype, tag = NULL,
         colnames(dataset1) <- colnames(dataset0)
 
         Expression_bulk <- dataset1[,1:ncol(bulk_dataset)]
-        Expression_pbmc <- dataset1[,(ncol(bulk_dataset) + 1):ncol(dataset1)]
-        X <- cor(Expression_bulk, Expression_pbmc)
+        Expression_cell <- dataset1[,(ncol(bulk_dataset) + 1):ncol(dataset1)]
+        X <- cor(Expression_bulk, Expression_cell)
 
         quality_check <- quantile(X)
         print("|**************************************************|")
@@ -121,7 +122,7 @@ Scissor <- function(bulk_dataset, sc_dataset, phenotype, tag = NULL,
                 print("Perform cox regression on the given clinical outcomes:")
             }
         }
-        save(X, Y, network, file = Save_file)
+        save(X, Y, network, Expression_bulk, Expression_cell, file = Save_file)
     }else{
         load(Load_file)
     }
@@ -145,16 +146,15 @@ Scissor <- function(bulk_dataset, sc_dataset, phenotype, tag = NULL,
         print(sprintf("Scissor identified %d Scissor+ cells and %d Scissor- cells.", length(Cell1), length(Cell2)))
         print(sprintf("The percentage of selected cell is: %s%%", formatC(percentage*100, format = 'f', digits = 3)))
 
-        if(percentage < cutoff){
+        if (percentage < cutoff){
             break
         }
         cat("\n")
     }
     print("|**************************************************|")
 
-    #print(colSums(X[,c(Cell1,Cell2)]))
-
-    return(list(Coefs = Coefs,
+    return(list(para = list(alpha = alpha[i], lambda = fit0$lambda.min, family = family),
+                Coefs = Coefs,
                 Scissor_pos = Cell1,
                 Scissor_neg = Cell2))
 }

diff --git a/R/evaluate.cell.R b/R/evaluate.cell.R
@@ -0,0 +1,100 @@
+#' Cell-level evaluations on the Scissor selected cells
+#'
+#' This function perfroms evaluations for each Scissor selected cell.
+#'
+#' This function uses two main strategies to
+#' we used the nonparametric bootstrap strategy to assess the sampling distribution of coefficient or all the Scissor selected ddd cells.#'
+#' @param Load_file File name for loading the preprocessed regression inputs.
+#' @param Scissor_result Output variable from Scissor.
+#' @param FDR_cutoff FDR cutoff in the correlation test for each pair of bulk sample and single-cell. The default value is 0.05.
+#' @param bootstrap_n Bootstrap resampling times.
+#'
+#' @return This function returns a \code{data.frame} with rows are the Scissor selected cells. The columns contain the following attributes:
+#'   \item{Mean correlation}{The mean correlation of a cell with all bulk samples.}
+#'   \item{Correlation > 0}{The percentage of positive correlations of a cell with all bulk samples.}
+#'   \item{Correlation < 0}{The percentage of negative correlations of a cell with all bulk samples.}
+#'   \item{Significant Correlation}{The percentage of significant correlations (FDR < \code{FDR_cutoff}) of a cell with all bulk samples.}
+#'   \item{Coefficient}{The regression coefficient beta calculated in Scissor.}
+#'   \item{Beta 0\%}{The minimum value of bootstrap coefficient.}
+#'   \item{Beta 25\%}{The lower quartile of bootstrap coefficient.}
+#'   \item{Beta 50\%}{The median value of bootstrap coefficient.}
+#'   \item{Beta 75\%}{TThe upper quartile of bootstrap coefficient.}
+#'   \item{Beta 100\%}{The maximum value of bootstrap coefficient.}
+#'   \item{Probability of zero}{The probability of a cell is not selected in bootstrap resamplings.}
+#'
+#' @import progress scales
+#' @export
+evaluate.cell <- function(Load_file, Scissor_result, FDR_cutoff = 0.05, bootstrap_n = 100){
+
+    library(progress)
+    library(scales)
+    load(Load_file)     # X, Y, network, Expression_bulk, Expression_cell
+
+    selected_cell <- c(Scissor_result$Scissor_pos, Scissor_result$Scissor_neg)
+    m <- ncol(Expression_bulk)
+    n <- length(selected_cell)
+    evaluate_summary <- as.data.frame(matrix(0, n, 11, dimnames = list(selected_cell,
+                        c("Mean correlation","Correlation > 0","Correlation < 0","Significant Correlation","Coefficient",
+                          "Beta 0%","Beta 25%","Beta 50%","Beta 75%","Beta 100%","Probability of zero"))))
+
+    # print("|**************************************************|")
+    # print("Performing correlation check for each selected cell")
+    # pb1 <- progress_bar$new(total = m)
+    # cor_test_p <- matrix(0, m, n)
+    # for (i in 1:m){
+    #     pb1$tick()
+    #     Sys.sleep(1 / 100)
+    #     for (j in 1:n){
+    #         cor_test_p[i,j] <- cor.test(Expression_bulk[,i], Expression_cell[,selected_cell[j]])$p.value
+    #     }
+    # }
+    # cor_test_FDR <- matrix(p.adjust(as.numeric(cor_test_p), method = "fdr"), m)
+    # for (j in 1:n){
+    #     evaluate_summary[j,1] <- mean(X[,selected_cell[j]])
+    #     evaluate_summary[j,2] <- percent(sum(X[,selected_cell[j]] > 0)/m)
+    #     evaluate_summary[j,3] <- percent(sum(X[,selected_cell[j]] < 0)/m)
+    #     evaluate_summary[j,4] <- percent(sum(cor_test_FDR[,j] < FDR_cutoff)/m)
+    # }
+    # names(Scissor_result$Coefs) <- colnames(X)
+    # evaluate_summary[,5] <- Scissor_result$Coefs[selected_cell]
+    # cat("Finished!\n")
+
+    print("|**************************************************|")
+    print("Performing nonparametric bootstrap")
+    pb2 <- progress_bar$new(total = bootstrap_n)
+    beta_bootstrap <- matrix(0, bootstrap_n, n, dimnames = list(paste0("Bootstrap_",  1:bootstrap_n), selected_cell))
+    for (i in 1:bootstrap_n){
+        set.seed(i)
+        alpha  <- Scissor_result$para$alpha
+        lambda <- Scissor_result$para$lambda
+        family <- Scissor_result$para$family
+        index  <- sample(m, size = m, replace = TRUE)
+        X2 <- X[index,]
+        if (family == "cox"){
+            Y2 <- Y[index,]
+        }else{
+            Y2 <- Y[index]
+        }
+        bootstrap_fit <- NULL
+        while (is.null(bootstrap_fit$fit)){
+            bootstrap_fit <- APML1(X2, Y2, family = family, penalty = "Net", alpha = alpha, Omega = network, lambda = lambda)
+        }
+        if (family == "binomial"){
+            Coefs_tmp <- as.numeric(bootstrap_fit$Beta[2:(ncol(X2)+1),])
+        }else{
+            Coefs_tmp <- as.numeric(bootstrap_fit$Beta)
+        }
+        names(Coefs_tmp) <- colnames(X2)
+        beta_bootstrap[i,] <- Coefs_tmp[selected_cell]
+        pb2$tick()
+        Sys.sleep(1 / 100)
+        if (i == bootstrap_n) cat("Finished!\n")
+    }
+    for (j in 1:n){
+        tmp <- beta_bootstrap[beta_bootstrap[,j] != 0, j]
+        evaluate_summary[j,6:10] <- fivenum(tmp)
+        evaluate_summary[j,11] <- sum(beta_bootstrap[,j] == 0)/bootstrap_n
+    }
+    return(evaluate_summary)
+}
+
diff --git a/README.md b/README.md
@@ -8,6 +8,7 @@
 </p>
 
 ### News ###
+May, 2021: Scissor version 2.1.0 is updated.
 Feb, 2021: Scissor version 2.0.0 is launched.  
 Jun, 2020: Scissor version 1.0.0 is launched.
 
@@ -32,3 +33,5 @@ Duanchen Sun and Zheng Xia*<br />
 
 ### License ###
 Scissor is licensed under the GNU General Public License v3.0.
+
+Improvements and new features of Scissor will be updated on a regular basis. Please post on the [GitHub discussion page](https://github.com/sunduanchen/Scissor/discussions) with any questions.
diff --git a/man/Scissor.Rd b/man/Scissor.Rd
diff --git a/man/evaluate.cell.Rd b/man/evaluate.cell.Rd