Skip to content

Commit

Permalink
latest
Browse files Browse the repository at this point in the history
  • Loading branch information
eleonore-schneeg committed Apr 11, 2023
1 parent 49b0b3d commit f94ad3b
Show file tree
Hide file tree
Showing 29 changed files with 460 additions and 9 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/r_package_dev_ele_new.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,6 @@ jobs:

- name: push service
run: |
docker push eleonoreschneeg/omix:${{ env.packageVersion }}
docker push eleonoreschneeg/omix:latest
docker push eleonoreschneeg/omix:dev
docker push eleonore-schneeg/omix:${{ env.packageVersion }}
docker push eleonore-schneeg/omix:latest
docker push eleonore-schneeg/omix:dev
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,8 @@ statmod \
systemfonts \
viridis \
visNetwork \
intNMF \
ActivePathways \
&& rm -rf /tmp/downloaded_packages

## Install Bioconductor packages
Expand Down
12 changes: 12 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export(DE_multiomics)
export(GSEA)
export(ask_chatgpt)
export(batch_correction_protein)
export(cell_type_enrichment)
export(clustering_DE_analysis)
Expand All @@ -16,10 +17,12 @@ export(filter_protein)
export(format_res_deseq)
export(format_res_limma)
export(generate_multiassay)
export(getClustNum)
export(integrate_with_DIABLO)
export(integrate_with_MBPLS)
export(integrate_with_MEIFESTO)
export(integrate_with_MOFA)
export(integrate_with_iCluster)
export(integrate_with_sMBPLS)
export(integrative_results_supervised)
export(integrative_results_unsupervised)
Expand All @@ -31,6 +34,7 @@ export(multiomics_network_cluster)
export(multiomics_network_matrix)
export(pathway_analysis_enrichr)
export(plot_OpenTarget)
export(plot_optimal_cluster)
export(process_protein)
export(process_rna)
export(processed_proteomics)
Expand All @@ -45,6 +49,7 @@ export(volcano_interactive)
export(volcano_interactive_comparison)
export(volcano_plot_deseq)
export(volcano_plot_limma)
import(SNFtool)
import(ggplot2)
import(igraph)
importFrom(AnnotationDbi,mapIds)
Expand All @@ -55,6 +60,7 @@ importFrom(DESeq2,results)
importFrom(EWCE,bootstrap_enrichment_test)
importFrom(EWCE,ewce_plot)
importFrom(EnsDb.Hsapiens.v86,EnsDb.Hsapiens.v86)
importFrom(IntNMF,nmf.opt.k)
importFrom(MOFA2,correlate_factors_with_covariates)
importFrom(MOFA2,create_mofa)
importFrom(MOFA2,get_default_data_options)
Expand Down Expand Up @@ -112,11 +118,14 @@ importFrom(enrichR,enrichr)
importFrom(enrichplot,pairwise_termsim)
importFrom(fgsea,collapsePathways)
importFrom(fgsea,fgsea)
importFrom(ggplot2,alpha)
importFrom(ggplot2,ggsave)
importFrom(ggplot2,ggtitle)
importFrom(ggpubr,ggscatter)
importFrom(ggrepel,geom_text_repel)
importFrom(httr,POST)
importFrom(httr,add_headers)
importFrom(iClusterPlus,iClusterBayes)
importFrom(jsonlite,fromJSON)
importFrom(limma,contrasts.fit)
importFrom(limma,eBayes)
Expand All @@ -132,6 +141,8 @@ importFrom(mixOmics,block.spls)
importFrom(mixOmics,block.splsda)
importFrom(mixOmics,selectVar)
importFrom(mixOmics,tune.block.splsda)
importFrom(mogsa,mbpca)
importFrom(mogsa,moGap)
importFrom(msigdbr,msigdbr)
importFrom(org.Hs.eg.db,org.Hs.eg.db)
importFrom(org.Mm.eg.db,org.Mm.eg.db)
Expand All @@ -154,6 +165,7 @@ importFrom(stats,as.formula)
importFrom(stats,model.matrix)
importFrom(stats,prcomp)
importFrom(stats,reorder)
importFrom(stringr,str_trim)
importFrom(stringr,str_wrap)
importFrom(tibble,enframe)
importFrom(tidyr,pivot_wider)
Expand Down
27 changes: 27 additions & 0 deletions R/chatgpt.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#' Calls chatGPT with a specific prompt
#'
#' @param prompt Character vector
#' @param api_key Personal API key
#'
#' @return ChatGPT answer
#' @export
#' @importFrom httr add_headers
#' @importFrom stringr str_trim

ask_chatgpt <- function(prompt,
api_key="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") {
response <- POST(
url = "https://api.openai.com/v1/chat/completions",
httr::add_headers(Authorization = paste("Bearer", api_key)),
content_type_json(),
encode = "json",
body = list(
model = "gpt-3.5-turbo",
messages = list(list(
role = "user",
content = prompt
))
)
)
stringr::str_trim(content(response)$choices[[1]]$message$content)
}
39 changes: 39 additions & 0 deletions R/integration_visualisations.R
Original file line number Diff line number Diff line change
Expand Up @@ -439,3 +439,42 @@ community_graph <- function(igraph,

return(res)
}


#' Plot optimal number of clusters from `getClustNum()`
#'
#' @param optk1 Cluster Prediction Index
#' @param optk2 Gap statistics
#'
#' @return plot
#' @export
#' @family Plotting
#' @importFrom ggplot2 alpha

plot_optimal_cluster <- function(optk1,
optk2,
try.N.clust) {
par(bty = "o", mgp = c(1.9, .33, 0), mar = c(3.1, 3.1, 2.1, 3.1) + .1, las = 1, tcl = -.25)
plot(NULL, NULL,
xlim = c(min(try.N.clust), max(try.N.clust)),
ylim = c(0, 1),
xlab = "Number of Multi-Omics Clusters", ylab = ""
)
rect(par("usr")[1], par("usr")[3], par("usr")[2], par("usr")[4], col = "#EAE9E9", border = FALSE)
grid(col = "white", lty = 1, lwd = 1.5)
points(try.N.clust, apply(optk1, 1, mean), pch = 19, col = ggplot2::alpha("#224A8D"), cex = 1.5)
lines(try.N.clust, apply(optk1, 1, mean), col = "#224A8D", lwd = 2, lty = 4)
mtext("Cluster Prediction Index", side = 2, line = 2, cex = 1.5, col = "#224A8D", las = 3)

par(new = TRUE, xpd = FALSE)
plot(NULL, NULL,
xlim = c(min(try.N.clust), max(try.N.clust)),
ylim = c(0, 1),
xlab = "", ylab = "", xaxt = "n", yaxt = "n"
)
points(try.N.clust, optk2$gap, pch = 19, col = ggplot2::alpha("#E51718", 0.8), cex = 1.5)
lines(try.N.clust, optk2$gap, col = "#E51718", lwd = 2, lty = 4)
axis(side = 4, at = seq(0, 1, 0.2), labels = c("0.0", "0.2", "0.4", "0.6", "0.8", "1.0"))
mtext("Gap-statistics", side = 4, line = 2, las = 3, cex = 1.5, col = "#E51718")
}

185 changes: 180 additions & 5 deletions R/integrative_models.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,10 @@ integrate_with_sMBPLS <- function(multimodal_omics,
Y,
design,
ncomp,
list.keepX = list(mRNA = c(50),
proteins = c(50))
) {
list.keepX = list(
mRNA = c(50),
proteins = c(50)
)) {
multimodal_omics <- lapply(multimodal_omics, t)
X <- list(
mRNA = multimodal_omics[[1]],
Expand Down Expand Up @@ -324,8 +325,7 @@ integrate_with_MEIFESTO <- function(multimodal_omics,
num_factors = 5,
scale_views = TRUE,
metadata,
time = "pseudotime"
) {
time = "pseudotime") {
# python_path <- Sys.which("python")
# reticulate::use_python(python_path, required = NULL)

Expand Down Expand Up @@ -373,3 +373,178 @@ integrate_with_MEIFESTO <- function(multimodal_omics,
model = model
))
}

#' @name getClustNum
#' @title Get estimation of optimal clustering number
#' @description This function provides two measurements (i.e., clustering prediction index [CPI] and Gap-statistics) and aims to search the optimal number for multi-omics integrative clustering. In short, the peaks reach by the red (CPI) and blue (Gap-statistics) lines should be referred to determine `N.clust`.
#' @param data List of matrices.
#' @param is.binary A logicial vector to indicate if the subdata is binary matrix of 0 and 1 such as mutation.
#' @param try.N.clust A integer vector to indicate possible choices of number of clusters.
#' @param center A logical value to indicate if the variables should be centered. TRUE by default.
#' @param scale A logical value to indicate if the variables should be scaled. FALSE by default.
#' @export
#' @return A figure that helps to choose the optimal clustering number (argument of `N.clust`) for `get%algorithm_name%()` or `getMOIC()`, and a list contains the following components:
#'
#' \code{CPI} possible cluster number identified by clustering prediction index
#'
#' \code{Gapk} possible cluster number identified by Gap-statistics
#'
#' @family Multi-omic integration
#' @importFrom IntNMF nmf.opt.k
#' @importFrom mogsa mbpca moGap
#' @import SNFtool
#' @importFrom ggplot2 alpha
#' @importFrom dplyr %>%
#' @examples # There is no example and please refer to vignette.
#' @references Chalise P, Fridley BL (2017). Integrative clustering of multi-level omic data based on non-negative matrix factorization algorithm. PLoS One, 12(5):e0176278.
#'
#' Tibshirani, R., Walther, G., Hastie, T. (2001). Estimating the number of data clusters via the Gap statistic. J R Stat Soc Series B Stat Methodol, 63(2):411-423.
#'
getClustNum <- function(data = NULL,
is.binary = rep(FALSE, length(data)),
try.N.clust = 2:8,
center = TRUE,
scale = TRUE) {
# check data
n_dat <- length(data)
if (n_dat > 2) {
stop("current version of Omix can support up to 2 datasets.")
}
if (n_dat < 2) {
stop("current version of Omix needs at least 2 omics data.")
}

data.backup <- data # save a backup

#--------------------------------------------#
# Cluster Prediction Index (CPI) from IntNMF #
# remove features that made of categories not equal to 2 otherwise Error in svd(X) : a dimension is zero
if (!all(!is.binary)) {
bindex <- which(is.binary == TRUE)
for (i in bindex) {
a <- which(rowSums(data[[i]]) == 0)
b <- which(rowSums(data[[i]]) == ncol(data[[i]]))
if (length(a) > 0) {
data[[i]] <- data[[i]][which(rowSums(data[[i]]) != 0), ] # remove all zero
}

if (length(b) > 0) {
data[[i]] <- data[[i]][which(rowSums(data[[i]]) != ncol(data[[i]])), ] # remove all one
}

if (length(a) + length(b) > 0) {
message(paste0("--", names(data)[i], ": a total of ", length(a) + length(b), " features were removed due to the categories were not equal to 2!"))
}
}
}

# In order to make the input data fit non-negativity constraint of intNMF,
# the values of the data were shifted to positive direction by adding absolute value of the smallest negative number.
# Further, each data was rescaled by dividing by maximum value of the data to make the magnitudes comparable (between 0 and 1) across the several datasets.
dat <- lapply(data, function(dd) {
if (!all(dd >= 0)) dd <- pmax(dd + abs(min(dd)), 0) + .Machine$double.eps # .Machine$double.eps as The smallest positive floating-point number x
dd <- dd / max(dd)
return(dd %>% as.matrix())
})

# dat <- lapply(dat, t)
dat <- lapply(dat, function(x) t(x) + .Machine$double.eps)

message("calculating Cluster Prediction Index...")
optk1 <- IntNMF::nmf.opt.k(
dat = dat,
n.runs = 5,
n.fold = 5,
k.range = try.N.clust,
st.count = 10,
maxiter = 100,
make.plot = FALSE
)
optk1 <- as.data.frame(optk1)

#-------------------------------#
# Gap-statistics from MoCluster #
message("calculating Gap-statistics...")
moas <- data.backup %>% mogsa::mbpca(
ncomp = 2,
k = "all",
method = "globalScore",
center = center,
scale = scale,
moa = TRUE,
svd.solver = "fast",
maxiter = 1000,
verbose = FALSE
)
gap <- mogsa::moGap(moas, K.max = max(try.N.clust), cluster = "hclust", plot = FALSE)
optk2 <- as.data.frame(gap$Tab)[-1, ] # remove k=1


N.clust <- as.numeric(which.max(apply(optk1, 1, mean) + optk2$gap)) + 1
if (length(N.clust) == 0) {
message("--fail to define the optimal cluster number!")
N.clust <- "null"
}


if (N.clust > 1) {
message(paste0("--the imputed optimal cluster number is ", N.clust, " arbitrarily, but it would be better referring to other priori knowledge."))
}
# return(list(N.clust = N.clust, CPI = optk1, Gapk = optk2, Eigen = optk3))
return(list(N.clust = N.clust, CPI = optk1, Gapk = optk2))
}

#' Vertical integration with iCluster
#'
#' @param multimodal_omics multimodal_object generated by `.get_multimodal_object`
#' @param try.N.clust Range of number of potential clusters in the optimisation step
#' @export
#'
#' @return a list object with `multimodal_object` and `model` slots
#'
#' @family Multi-omic integration
#' @importFrom iClusterPlus iClusterBayes

integrate_with_iCluster <- function(multimodal_omics,
try.N.clust = 2:6) {
cli::cli_alert_success("Optimising the number of cluster (this make take a while")

args <- list(
data = multimodal_omics,
is.binary = c(F, F),
try.N.clust = try.N.clust,
center = TRUE,
scale = TRUE)

optk.i <- do.call(getClustNum, args)

optimal_n <- optk.i$N.clust

m1 <- t(multimodal_omics[[1]])
m2 <- t(multimodal_omics[[2]])

cli::cli_alert_success("Integration in progress (this make take a while)")
model <- iClusterPlus::iClusterBayes(
dt1 = m1,
dt2 = m2,
dt3 = NULL, dt4 = NULL, dt5 = NULL, dt6 = NULL,
type = c("gaussian", "gaussian"),
K = optimal_n - 1,
n.burnin = 1000,
n.draw = 1200,
prior.gamma = rep(0.1, 6),
sdev = 0.5,
beta.var.scale = 1,
thin = 1,
pp.cutoff = 0.5
)
return(list(
multimodal_object = multimodal_omics,
model = model,
tuning = list(
optk.i = optk.i,
try.N.clust = try.N.clust
)
))
}

22 changes: 22 additions & 0 deletions man/ask_chatgpt.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit f94ad3b

Please sign in to comment.