Skip to content

Commit

Permalink
Refactoring the GapFill function:
Browse files Browse the repository at this point in the history
- Adding missing import
- extracting repeated code in a separate function
- Change in the message output to show the percentage of gaps filled by iteration
  • Loading branch information
nmendozam committed Oct 25, 2020
1 parent 86590e5 commit a57b651
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 63 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ importFrom("KEGGREST","keggGet")
importFrom("KEGGREST","keggLink")
importFrom("KEGGREST","keggList")
importFrom("minval","metabolites")
importFrom("minval","orphanMetabolites")
importFrom("minval","orphanProducts")
importFrom("minval","orphanReactants")
importFrom("minval","products")
Expand Down
101 changes: 55 additions & 46 deletions R/gapFill.R
Original file line number Diff line number Diff line change
@@ -1,81 +1,90 @@
#' @export gapFill
#' @importFrom "minval" "orphanReactants" "orphanProducts" "reactants" "products"
#' @importFrom "minval" "orphanReactants" "orphanProducts" "orphanMetabolites" "reactants" "products"
#' @author Kelly Botero <[email protected]> - Maintainer: Daniel Camilo Osorio <[email protected]>
# Bioinformatics and Systems Biology Lab | Universidad Nacional de Colombia
# Experimental and Computational Biochemistry | Pontificia Universidad Javeriana
#' @title Find and fill gaps in a metabolic network
#' @description This function identifies the gaps and fills it from the stoichiometric reactions of a reference metabolic reconstruction using a weighting function.
#' @seealso \code{additionCost} function documentation.
#' @param reactionList A set of stoichiometric reaction with the following format:
#'
#' \code{"H2O[c] + Urea-1-carboxylate[c] <=> 2 CO2[c] + 2 NH3[c]"}
#'
#' @param reactionList A set of stoichiometric reaction with the following format:
#'
#' \code{"H2O[c] + Urea-1-carboxylate[c] <=> 2 CO2[c] + 2 NH3[c]"}
#'
#' Where arrows and plus signs are surrounded by a "space character".
#' It is also expected that stoichiometry coefficients are surrounded by spaces, (nothe the "2" before the CO2[c] or the NH3[c]).
#' It also expects arrows to be in the form "\code{=>}" or "\code{<=>}".
#' It also expects arrows to be in the form "\code{=>}" or "\code{<=>}".
#' Meaning that arrows like "\code{==>}", "\code{<==>}", "\code{-->}" or "\code{->}" will not be parsed and will lead to errors.
#' @param reference A set of stoichiometric reaction with the same format of reactionList
#' @param limit An addition cost value to be used as a limit to select reactions to be added. Is calculated as NumberNewMetabolites/NumerOfMetabolites for each reaction.
#' @param woCompartment A boolean value \code{TRUE} to define if compartment labels should be removed of the reactionList stoichiometric reactions, \code{FALSE} is used as default.
#' @param consensus A boolean value \code{TRUE} to define if reactionList and newReactions should be reported as a unique vector or \code{FALSE} if just newReactions should be reported.
#'
#' @examples
#' @param nRun The number of iterations to search for new gaps and fill them according to the score, the default is five.
#'
#' @examples
#' \dontrun{
#' # Downloading stoichiometric reactions
#' all <- getReference(organism = "all",sep = ";")
#' eco <- getReference(organism = "eco",sep = ";")
#'
#' all <- getReference(organism = "all", sep = ";")
#' eco <- getReference(organism = "eco", sep = ";")
#'
#' # Filtering reactions
#' all <- mapReactions(reactionList = all$reaction%in%eco$reaction,
#' referenceData = all,
#' by = "bool",
#' inverse = TRUE)
#'
#' all <- mapReactions(
#' reactionList = all$reaction %in% eco$reaction,
#' referenceData = all,
#' by = "bool",
#' inverse = TRUE
#' )
#'
#' # gapFill
#' gapFill(reactionList = eco$reaction,
#' reference = all$reaction,
#' limit = 0.25,
#' woCompartment = TRUE,
#' consensus = FALSE)}
gapFill <- function(reactionList, reference, limit = 0.25, nRun = 5, woCompartment=FALSE, consensus=FALSE){
#' gapFill(
#' reactionList = eco$reaction,
#' reference = all$reaction,
#' limit = 0.25,
#' woCompartment = TRUE,
#' consensus = FALSE
#' )
#' }
gapFill <- function(reactionList, reference, limit = 0.25, nRun = 5, woCompartment = FALSE, consensus = FALSE) {
reference_reactants <- reactants(reference)
reference_products <- products(reference)
newR <- NULL
n <- 0
while (n < nRun) {
oR <- orphanReactants(reactionList)
oP <- orphanProducts(reactionList)
repeat({
aC <- additionCost(reactionList = reference, reference = reactionList)
rA <- reference[aC <= limit]
toAdd <- rA[unlist(lapply(reference_reactants[aC <= limit],function(sR){any(sR %in% oR)}))]
if(!all(toAdd %in% newR)){
newR <- unique(c(newR,toAdd))
reactionList <- unique(c(reactionList,newR))
} else {
break()
}
})
repeat({
aC <- additionCost(reactionList = reference, reference = reactionList)
rA <- reference[aC <= limit]
toAdd <- rA[unlist(lapply(reference_products[aC <= limit],function(sP){any(sP %in% oP)}))]
if(!all(toAdd %in% newR)){
newR <- unique(c(newR,toAdd))
reactionList <- unique(c(reactionList,newR))
} else {
break()
}
})

filledReactions <- fillOrphanMetabolites(reference, reactionList, limit, reference_reactants, oR, newR)
newR <- filledReactions$new
reactionList <- filledReactions$summary
filledReactions <- fillOrphanMetabolites(reference, reactionList, limit, reference_products, oP, newR)
newR <- filledReactions$new
reactionList <- filledReactions$summary

n <- n + 1
message(round(mean(!unique(c(oR,oP)) %in% orphanMetabolites(reactionList)),2))
message(round(mean(!unique(c(oR, oP)) %in% orphanMetabolites(reactionList)), 2) * 100, "% gaps filled in the last iteration")
}
if(isTRUE(consensus)){
if (isTRUE(consensus)) {
return(reactionList)
} else {
return(newR)
}
}

fillOrphanMetabolites <- function(reference, reactionList, limit, reference_metabolites, oM, newR) {
repeat({
aC <- additionCost(reactionList = reference, reference = reactionList)
rA <- reference[aC <= limit]
toAdd <- rA[unlist(lapply(reference_metabolites[aC <= limit], function(sR) {
any(sR %in% oM)
}))]
if (any(!toAdd %in% newR)) {
newR <- unique(c(newR, toAdd))
reactionList <- unique(c(reactionList, newR))
} else {
break()
}
})
return(list("new" = newR, "summary" = reactionList))
}
# hsa <- getReference(organism = "hsa")
# reactionList <- sample(hsa$reaction,100)
# reference <- hsa$reaction[!hsa$reaction %in% reactionList]
Expand Down
47 changes: 30 additions & 17 deletions man/gapFill.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit a57b651

Please sign in to comment.