Skip to content

Commit

Permalink
Add 'predict_topology_from_sequence' and 'predict_topologies_from_seq…
Browse files Browse the repository at this point in the history
…uences'
  • Loading branch information
richelbilderbeek committed Jan 16, 2021
1 parent 148f83e commit bb0bdfb
Show file tree
Hide file tree
Showing 12 changed files with 225 additions and 4 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Imports:
dplyr,
ggplot2,
plyr,
pureseqtmr,
pureseqtmr (>= 1.3),
purrr,
rappdirs,
readr,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ export(parse_fasta_file)
export(plot_locatome)
export(plot_topology)
export(plot_topology_text)
export(predict_topologies_from_sequences)
export(predict_topology)
export(predict_topology_from_sequence)
export(run_tmhmm)
export(run_tmhmm_on_one_protein_fasta_file)
export(run_tmhmm_on_sequence)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ Newest versions at top.

### NEW FEATURES

* Added `predict_topology_from_sequence`
and `predict_topologies_from_sequences` to directly predict topologies
from protein sequences
* Added dependency on `pureseqtmr`,
for `pureseqtmr::load_fasta_file_as_tibble`,
which is approximately ten thousand times faster than `parse_fasta_file`
Expand Down
9 changes: 9 additions & 0 deletions R/default_params_doc.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@
#' that contains one name and one sequence
#' @param protein_sequence a protein sequence
#' @param protein_sequences one or more protein sequences
#' @param temp_fasta_filename filename to temporarily
#' save the protein names and sequences.
#' This file will be deleted when this
#' function completed successfully
#' @param temp_topology_filename filename to temporarily
#' save the topology. This file will be deleted when this
#' function completed successfully
#' @param tmhmm_bin_filename path to the TMHMM binary file.
#' Its default path can be ontained by using
#' \link{get_default_tmhmm_bin_path}
Expand Down Expand Up @@ -45,6 +52,8 @@ default_params_doc <- function(
one_protein_fasta_filename,
protein_sequence,
protein_sequences,
temp_fasta_filename,
temp_topology_filename,
tmhmm_bin_filename,
tmhmm_filename,
tmhmm_result,
Expand Down
39 changes: 39 additions & 0 deletions R/predict_topologies_from_sequences.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#' Run TMHMM directy on a protein sequence
#' @inheritParams default_params_doc
#' @return one or more topology sequences,
#' for example \code{c("iiiimmmmooooo", "iiiii")}
#' @seealso use \link{mock_predict_topologies_from_sequences}
#' to mock the prediction of protein sequences, as can be useful
#' in testing
#' @examples
#' if (is_tmhmm_installed()) {
#' protein_sequence <- paste0(
#' "QEKNWSALLTAVVIILTIAGNILVIMAVSLEKKLQNATNYFLM",
#' "SLAIADMLLGFLVMPVSMLTILYGYRWP"
#' )
#' predict_topology_from_sequence(protein_sequence)
#' }
#' @author Richèl J.C. Bilderbeek
#' @export
predict_topologies_from_sequences <- function(# nolint indeed a long function name
protein_sequences,
folder_name = get_default_tmhmm_folder(),
temp_fasta_filename = tempfile(fileext = ".fasta"),
temp_topology_filename = tempfile(fileext = ".topo")
) {
t_sequences <- tibble::tibble(
name = paste0("protein_", seq_along(protein_sequences)),
sequence = protein_sequences
)
pureseqtmr::save_tibble_as_fasta_file(
t = t_sequences,
fasta_filename = temp_fasta_filename
)
t_topologies <- tmhmm::predict_topology(
fasta_filename = temp_fasta_filename,
folder_name = folder_name,
temp_topology_filename = temp_topology_filename
)
file.remove(temp_fasta_filename)
t_topologies$topology
}
3 changes: 0 additions & 3 deletions R/predict_topology.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
#' @return a \link[tibble]{tibble} with the columns 'name'
#' and 'topology', where the 'name' column hold all the proteins' names,
#' and 'topology' contains all respective topologies.
#' @param temp_topology_filename filename to temporarily
#' save the topology. This file will be deleted when this
#' function completed successfully
#' @examples
#' if (is_tmhmm_installed()) {
#' fasta_filename <- system.file("extdata", "tmhmm.fasta", package = "tmhmm")
Expand Down
30 changes: 30 additions & 0 deletions R/predict_topology_from_sequence.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#' Run TMHMM directy on a protein sequence
#' @param protein_sequence a protein sequence, with
#' the amino acids as capitals, for
#' example \code{MEILCEDNTSLSSIPNSL}
#' @inheritParams default_params_doc
#' @return a topology sequence,
#' for example \code{"iiiimmmmmoooo"}
#' @examples
#' if (is_tmhmm_installed()) {
#' protein_sequence <- paste0(
#' "QEKNWSALLTAVVIILTIAGNILVIMAVSLEKKLQNATNYFLM",
#' "SLAIADMLLGFLVMPVSMLTILYGYRWP"
#' )
#' predict_topology_from_sequence(protein_sequence)
#' }
#' @author Richèl J.C. Bilderbeek
#' @export
predict_topology_from_sequence <- function(
protein_sequence,
folder_name = get_default_tmhmm_folder(),
temp_fasta_filename = tempfile(fileext = ".fasta"),
temp_topology_filename = tempfile(fileext = ".topo")
) {
tmhmm::predict_topologies_from_sequences(
protein_sequences = protein_sequence,
folder_name = folder_name,
temp_fasta_filename = temp_fasta_filename,
temp_topology_filename = temp_topology_filename
)
}
11 changes: 11 additions & 0 deletions man/default_params_doc.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

54 changes: 54 additions & 0 deletions man/predict_topologies_from_sequences.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

51 changes: 51 additions & 0 deletions man/predict_topology_from_sequence.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions tests/testthat/test-predict_topologies_from_sequences.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
test_that("use", {
if (!is_tmhmm_installed()) return()
protein_sequences <- c(
"FAMILYVWFAMILYVW",
"FAMILYVWFAMILY"
)
topologies <- predict_topologies_from_sequences(
protein_sequences = protein_sequences
)
expect_equal(length(protein_sequences), length(topologies))
expect_equal(nchar(protein_sequences), nchar(topologies))
})
13 changes: 13 additions & 0 deletions tests/testthat/test-predict_topology_from_sequence.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
test_that("use", {
if (!is_tmhmm_installed()) return()
protein_sequence <-
"QEKNWSALLTAVVIILTIAGNILVIMAVSLEKKLQNATNYFLMSLAIADMLLGFLVMPVSMLTILYGYRWP"
topology <- predict_topology_from_sequence(
protein_sequence = protein_sequence
)
expect_equal(
topology,
"iiiiiiMMMMMMMMMMMMMMMMMMMMMMMooooooooooooooMMMMMMMMMMMMMMMMMMMMMMMiiiii"
)
expect_equal(nchar(protein_sequence), nchar(topology))
})

0 comments on commit bb0bdfb

Please sign in to comment.