forked from YuLab-SMU/ggmsa
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgeom_msa.R
190 lines (171 loc) · 7.44 KB
/
geom_msa.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
##' Multiple sequence alignment layer for ggplot2.
##' It creates background tiles with/without sequence characters.
##'
##' @title geom_msa
##' @param data sequence alignment with data frame, generated by tidy_msa().
##' @param font font families, possible values are 'helvetical', 'mono',
##' and 'DroidSansMono', 'TimesNewRoman'. Defaults is 'helvetical'.
##' @param mapping aes mapping
##' If font = NULL, only plot the background tile.
##' @param color A Color scheme. One of 'Clustal', 'Chemistry_AA', 'Shapely_AA',
##' 'Zappo_AA', 'Taylor_AA', 'LETTER','CN6',, 'Chemistry_NT', 'Shapely_NT',
##' 'Zappo_NT', 'Taylor_NT'. Defaults is 'Chemistry_AA'.
##' @param custom_color A data frame with two column called "names" and
##' "color".Customize the color scheme.
##' @param char_width a numeric vector. Specifying the character width in
##' the range of 0 to 1. Defaults is 0.9.
##' @param by_conservation a logical value. The most conserved regions have
##' the brightest colors.
##' @param none_bg a logical value indicating whether background
##' should be displayed. Defaults is FALSE.
##' @param position_highlight A numeric vector of the position that
##' need to be highlighted.
##' @param seq_name a logical value indicating whether sequence names
##' should be displayed. Defaults is 'NULL' which indicates that the
##' sequence name is displayed when 'font = null', but 'font = char'
##' will not be displayed. If 'seq_name = TRUE' the sequence name will
##' be displayed in any case. If 'seq_name = FALSE' the sequence name will not
##' be displayed under any circumstances.
##' @param border a character string. The border color.
##' @param consensus_views a logical value that opening consensus views.
##' @param use_dot a logical value. Displays characters as dots instead of
##' fading their color in the consensus view.
##' @param disagreement a logical value. Displays characters that disagreement
##' to consensus(excludes ambiguous disagreements).
##' @param ignore_gaps a logical value. When selected TRUE,
##' gaps in column are treated as if that row didn't exist.
##' @param ref a character string. Specifying the reference sequence
##' which should be one of input sequences when 'consensus_views' is TRUE.
##' @param position Position adjustment, either as a string, or
##' the result of a call to a position adjustment function,
##' default is 'identity' meaning 'position_identity()'.
##' @param show.legend logical. Should this layer be included in the legends?
##' @param dms logical.
##' @param position_color logical.
##' @param ... additional parameter
##' @return A list
##' @importFrom ggplot2 scale_fill_manual
##' @importFrom utils modifyList
##' @export
##' @examples
##' library(ggplot2)
##'aln <- system.file("extdata", "sample.fasta", package = "ggmsa")
##'tidy_aln <- tidy_msa(aln, start = 150, end = 170)
##'ggplot() + geom_msa(data = tidy_aln, font = NULL) + coord_fixed()
##' @author Guangchuang Yu, Lang Zhou
geom_msa <- function(data, font = "helvetical",
mapping = NULL,
color = "Chemistry_AA",
custom_color = NULL,
char_width = 0.9,
none_bg = FALSE,
by_conservation = FALSE,
position_highlight = NULL,
seq_name = NULL,
border = NULL,
consensus_views = FALSE,
use_dot = FALSE,
disagreement = TRUE,
ignore_gaps = FALSE,
ref = NULL,
position = "identity",
show.legend = FALSE,
dms = FALSE,
position_color = FALSE,
... ) {
data <- msa_data(data,
font = font,
color = color,
custom_color = custom_color,
char_width = char_width,
by_conservation = by_conservation,
consensus_views = consensus_views,
use_dot = use_dot,
disagreement = disagreement,
ignore_gaps = ignore_gaps,
ref = ref)
#legend work
xx <- data[,c("character","color")] %>% unique()
xx <- xx[!is.na(xx$color),]
labs <- lapply(unique(xx$color) %>% seq_along, function(i) {
cols <- unique(xx$color)[i]
dup_char <- xx[xx$color == cols, "character"]
lab <- paste0(dup_char, collapse = ",")
}) %>% do.call("rbind",.) %>% as.vector()
cols <- xx$color %>% unique()
names(cols) <- cols
sacle_tile_cols <- scale_fill_manual(values = cols,
breaks = cols,
labels = labs)
bg_data <- data
#work to ggtreeExtra
if (is.null(mapping)) {
mapping <- aes_(x = ~position, y = ~name, fill = ~I(color))
}
#dms color work
if (dms) {
mapping <- modifyList(mapping, aes_(fill = ~bind_avg))
}
if (position_color) {
mapping <- modifyList(mapping, aes_(fill = ~I(pos_color)))
}
#'seq_name' work
if (!isTRUE(seq_name)) {
if ('y' %in% colnames(data) || isFALSE(seq_name) ) {
y <- as.numeric(bg_data$name)
mapping <- modifyList(mapping, aes_(y = ~y)) #"~y" is seq numbers
}
}
#'position_highlight' work
if (!is.null(position_highlight)) {
none_bg = TRUE
bg_data <- bg_data[bg_data$position %in% position_highlight,]
bg_data$postion <- as.factor(bg_data$position)
mapping <- modifyList(mapping, aes_(x = ~position,
fill = ~color,
width = 1))
}
#'border' work
if(is.null(border)){
ly_bg <- geom_tile(mapping = mapping, data = bg_data, color = 'grey',
inherit.aes = FALSE, position = position,
show.legend = show.legend)
}else{
ly_bg <- geom_tile(mapping = mapping, data = bg_data, color = border,
inherit.aes = FALSE, position = position,
show.legend = show.legend)
}
if (!all(c("yy", "order", "group") %in% colnames(data))) {
if(position_color) {
return(list(ly_bg))
}else{
return(list(ly_bg, sacle_tile_cols))
}
}
if ('y' %in% colnames(data)) {
data$yy = data$yy - as.numeric(data$name) + data$y
}
label_mapping <- aes_(x = ~x, y = ~yy, group = ~group)
# use_dot work
if (consensus_views && !use_dot) {
if(show.legend) {
stop("legends catn't be shown in the consensus view!")
}
label_mapping <- modifyList(label_mapping, aes_(fill = ~I(font_color)))
}
ly_label <- geom_polygon(mapping = label_mapping, data = data,
inherit.aes = FALSE, position = position)
#'none_bg' work
if (none_bg & is.null(position_highlight)) {
return(ly_label)
}
if(consensus_views) {
return(list(ly_bg, ly_label))
}else {
if(position_color){
return(list(ly_bg, ly_label))
}else{
return(list(ly_bg, ly_label, sacle_tile_cols))
}
}
}