Refactoring the GapFill function:

- Adding missing import - extracting repeated code in a separate function - Change in the message output to show the percentage of gaps filled by iteration
gibbslab · Oct 25, 2020 · a57b651 · a57b651
1 parent 86590e5
commit a57b651
Show file tree

Hide file tree

Showing 3 changed files with 86 additions and 63 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -9,6 +9,7 @@ importFrom("KEGGREST","keggGet")
 importFrom("KEGGREST","keggLink")
 importFrom("KEGGREST","keggList")
 importFrom("minval","metabolites")
+importFrom("minval","orphanMetabolites")
 importFrom("minval","orphanProducts")
 importFrom("minval","orphanReactants")
 importFrom("minval","products")

diff --git a/R/gapFill.R b/R/gapFill.R
@@ -1,81 +1,90 @@
 #' @export gapFill
-#' @importFrom "minval" "orphanReactants" "orphanProducts" "reactants" "products"
+#' @importFrom "minval" "orphanReactants" "orphanProducts" "orphanMetabolites" "reactants" "products"
 #' @author Kelly Botero <[email protected]> - Maintainer: Daniel Camilo Osorio <[email protected]>
 #  Bioinformatics and Systems Biology Lab      | Universidad Nacional de Colombia
 #  Experimental and Computational Biochemistry | Pontificia Universidad Javeriana
 #' @title Find and fill gaps in a metabolic network
 #' @description This function identifies the gaps and fills it from the stoichiometric reactions of a reference metabolic reconstruction using a weighting function.
 #' @seealso \code{additionCost} function documentation.
-#' @param reactionList A set of stoichiometric reaction with the following format: 
-#' 
-#' \code{"H2O[c] + Urea-1-carboxylate[c] <=> 2 CO2[c] + 2 NH3[c]"} 
-#' 
+#' @param reactionList A set of stoichiometric reaction with the following format:
+#'
+#' \code{"H2O[c] + Urea-1-carboxylate[c] <=> 2 CO2[c] + 2 NH3[c]"}
+#'
 #' Where arrows and plus signs are surrounded by a "space character".
 #' It is also expected that stoichiometry coefficients are surrounded by spaces, (nothe the "2" before the CO2[c] or the NH3[c]).
-#' It also expects arrows to be in the form "\code{=>}" or "\code{<=>}". 
+#' It also expects arrows to be in the form "\code{=>}" or "\code{<=>}".
 #' Meaning that arrows like "\code{==>}", "\code{<==>}", "\code{-->}" or "\code{->}" will not be parsed and will lead to errors.
 #' @param reference A set of stoichiometric reaction with the same format of reactionList
 #' @param limit An addition cost value to be used as a limit to select reactions to be added. Is calculated as NumberNewMetabolites/NumerOfMetabolites for each reaction.
 #' @param woCompartment A boolean value \code{TRUE} to define if compartment labels should be removed of the reactionList stoichiometric reactions, \code{FALSE} is used as default.
 #' @param consensus A boolean value \code{TRUE} to define if reactionList and newReactions should be reported as a unique vector or \code{FALSE} if just newReactions should be reported.
-#' 
-#' @examples 
+#' @param nRun The number of iterations to search for new gaps and fill them according to the score, the default is five.
+#'
+#' @examples
 #' \dontrun{
 #' # Downloading stoichiometric reactions
-#' all <- getReference(organism = "all",sep = ";")
-#' eco <- getReference(organism = "eco",sep = ";")
-#' 
+#' all <- getReference(organism = "all", sep = ";")
+#' eco <- getReference(organism = "eco", sep = ";")
+#'
 #' # Filtering reactions
-#' all <- mapReactions(reactionList = all$reaction%in%eco$reaction,
-#'                     referenceData = all,
-#'                     by = "bool",
-#'                     inverse = TRUE)
-#'                     
+#' all <- mapReactions(
+#'   reactionList = all$reaction %in% eco$reaction,
+#'   referenceData = all,
+#'   by = "bool",
+#'   inverse = TRUE
+#' )
+#'
 #' # gapFill
-#' gapFill(reactionList = eco$reaction,
-#'         reference = all$reaction, 
-#'         limit = 0.25,
-#'         woCompartment = TRUE,
-#'         consensus = FALSE)}
-gapFill <- function(reactionList, reference, limit = 0.25, nRun = 5, woCompartment=FALSE, consensus=FALSE){
+#' gapFill(
+#'   reactionList = eco$reaction,
+#'   reference = all$reaction,
+#'   limit = 0.25,
+#'   woCompartment = TRUE,
+#'   consensus = FALSE
+#' )
+#' }
+gapFill <- function(reactionList, reference, limit = 0.25, nRun = 5, woCompartment = FALSE, consensus = FALSE) {
   reference_reactants <- reactants(reference)
   reference_products <- products(reference)
   newR <- NULL
   n <- 0
   while (n < nRun) {
     oR <- orphanReactants(reactionList)
     oP <- orphanProducts(reactionList)
-    repeat({
-      aC <- additionCost(reactionList = reference, reference = reactionList)
-      rA <- reference[aC <= limit]
-      toAdd <- rA[unlist(lapply(reference_reactants[aC <= limit],function(sR){any(sR %in% oR)}))]
-      if(!all(toAdd %in% newR)){
-        newR <- unique(c(newR,toAdd))
-        reactionList <- unique(c(reactionList,newR))
-      } else {
-        break()
-      }
-    })
-    repeat({
-      aC <- additionCost(reactionList = reference, reference = reactionList)
-      rA <- reference[aC <= limit]
-      toAdd <- rA[unlist(lapply(reference_products[aC <= limit],function(sP){any(sP %in% oP)}))]
-      if(!all(toAdd %in% newR)){
-        newR <- unique(c(newR,toAdd))
-        reactionList <- unique(c(reactionList,newR))
-      } else {
-        break()
-      }
-    })
+
+    filledReactions <- fillOrphanMetabolites(reference, reactionList, limit, reference_reactants, oR, newR)
+    newR <- filledReactions$new
+    reactionList <- filledReactions$summary
+    filledReactions <- fillOrphanMetabolites(reference, reactionList, limit, reference_products, oP, newR)
+    newR <- filledReactions$new
+    reactionList <- filledReactions$summary
+
     n <- n + 1
-    message(round(mean(!unique(c(oR,oP)) %in% orphanMetabolites(reactionList)),2))
+    message(round(mean(!unique(c(oR, oP)) %in% orphanMetabolites(reactionList)), 2) * 100, "% gaps filled in the last iteration")
   }
-  if(isTRUE(consensus)){
+  if (isTRUE(consensus)) {
     return(reactionList)
   } else {
     return(newR)
   }
 }
+
+fillOrphanMetabolites <- function(reference, reactionList, limit, reference_metabolites, oM, newR) {
+  repeat({
+    aC <- additionCost(reactionList = reference, reference = reactionList)
+    rA <- reference[aC <= limit]
+    toAdd <- rA[unlist(lapply(reference_metabolites[aC <= limit], function(sR) {
+      any(sR %in% oM)
+    }))]
+    if (any(!toAdd %in% newR)) {
+      newR <- unique(c(newR, toAdd))
+      reactionList <- unique(c(reactionList, newR))
+    } else {
+      break()
+    }
+  })
+  return(list("new" = newR, "summary" = reactionList))
+}
 # hsa <- getReference(organism = "hsa")
 # reactionList <- sample(hsa$reaction,100)
 # reference <- hsa$reaction[!hsa$reaction %in% reactionList]

diff --git a/man/gapFill.Rd b/man/gapFill.Rd