forked from fbreitwieser/pavian
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsample-build_sankey_network.R
87 lines (74 loc) · 3.16 KB
/
sample-build_sankey_network.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
build_sankey_network <- function(my_report, taxRanks = c("D","K","P","C","O","F","G","S"), maxn=10,
zoom = F, title = NULL,
...) {
stopifnot("taxRank" %in% colnames(my_report))
if (!any(taxRanks %in% my_report$taxRank)) {
warning("report does not contain any of the taxRanks - skipping it")
return()
}
my_report <- subset(my_report, taxRank %in% taxRanks)
my_report <- plyr::ddply(my_report, "taxRank", function(x) x[utils::tail(order(x$cladeReads,-x$depth), n=maxn), , drop = FALSE])
my_report <- my_report[, c("name","taxLineage","taxonReads", "cladeReads","depth", "taxRank")]
my_report <- my_report[!my_report$name %in% c('-_root'), ]
#my_report$name <- sub("^-_root.", "", my_report$name)
splits <- strsplit(my_report$taxLineage, "\\|")
## for the root nodes, we'll have to add an 'other' link to account for all cladeReads
root_nodes <- sapply(splits[sapply(splits, length) ==2], function(x) x[2])
sel <- sapply(splits, length) >= 3
splits <- splits[sel]
links <- data.frame(do.call(rbind,
lapply(splits, function(x) utils::tail(x[x %in% my_report$name], n=2))), stringsAsFactors = FALSE)
colnames(links) <- c("source","target")
links$value <- my_report[sel,"cladeReads"]
my_taxRanks <- taxRanks[taxRanks %in% my_report$taxRank]
taxRank_to_depth <- stats::setNames(seq_along(my_taxRanks)-1, my_taxRanks)
nodes <- data.frame(name=my_report$name,
depth=taxRank_to_depth[my_report$taxRank],
value=my_report$cladeReads,
stringsAsFactors=FALSE)
for (node_name in root_nodes) {
diff_sum_vs_all <- my_report[my_report$name == node_name, "cladeReads"] - sum(links$value[links$source == node_name])
if (diff_sum_vs_all > 0) {
nname <- paste("other", sub("^._","",node_name))
#nname <- node_name
#links <- rbind(links, data.frame(source=node_name, target=nname, value=diff_sum_vs_all, stringsAsFactors = FALSE))
#nodes <- rbind(nodes, nname)
}
}
names_id = stats::setNames(seq_len(nrow(nodes)) - 1, nodes[,1])
links$source <- names_id[links$source]
links$target <- names_id[links$target]
links <- links[links$source != links$target, ]
nodes$name <- sub("^._","", nodes$name)
links$source_name <- nodes$name[links$source + 1]
if (!is.null(links))
sankeyD3::sankeyNetwork(
Links = links,
Nodes = nodes,
doubleclickTogglesChildren = TRUE,
Source = "source",
Target = "target",
Value = "value",
NodeID = "name",
NodeGroup = "name",
NodePosX = "depth",
NodeValue = "value",
dragY = TRUE,
xAxisDomain = my_taxRanks,
numberFormat = "pavian",
title = title,
nodeWidth = 15,
linkGradient = TRUE,
nodeShadow = TRUE,
nodeCornerRadius = 5,
units = "cladeReads",
fontSize = 12,
iterations = maxn * 100,
align = "none",
highlightChildLinks = TRUE,
orderByPath = TRUE,
scaleNodeBreadthsByString = TRUE,
zoom = zoom,
...
)
}