-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnetwork2.R
403 lines (335 loc) · 17.1 KB
/
network2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
####基于R语言的SparCC分析
rm(list=ls())
#install.packages("robCompositions")
#library(robCompositions)
library(vegan)
library(igraph)
library(qgraph)
library(RColorBrewer)
library(readxl)
library(ggraph)
library(graphlayouts)
library(reshape)
#library(devtools)
#install_github("kasperwelbers/semnet")
library(semnet)
#install.packages('devtools')
#devtools::install_github('zdk123/SpiecEasi')
#加载 SpiecEasi 包
library(SpiecEasi)
#temp_nm1 <- "../otu/ileum/"
#temp_nm2 <- "../otu/cecum/"
#temp_nm3 <- "../otu/fecal/"
#temp_nm4 <- "cecum" # 不同部位时该参数需要改动
# load working datasheet
options (stringsAsFactors = FALSE)
#work_df <- read.csv("Ileum_OTU.csv",header = T,row.names = 1,check.names = F)
a <- read.csv("MSMS/msms_Intensity2_t.csv",
header = T,row.names = 1,check.names = F)
#overlap_index <- intersect(intersect(rownames(a), rownames(b)), rownames(c))
#meta_df <- b #a,b,c三个数据框根据所需要的肠道部位来改动
#otu <- meta_df
# centered log ratio transformation
# otu.clr <- cenLR(otu, exp(10))
# otu <- decostand(otu,"log")
#otu <- log10(otu)
# load library
#sub_otu <- read.delim("clipboard", header = FALSE) # micro_asso_metabo_overlap_f_g_s_otu.xlsx,这里就是根据要分析的代谢物来选择,比如在盲肠部位与代谢物相关的Otus是那些,拷贝过来
#keep <- intersect(colnames(otu), sub_otu$V1) # 和所选择的相应部位总Otus取交集
#C_H.otu <- otu[,keep] # 留下有交集的那些Otus,及其丰度表
# sparcc.otu<-sparcc(otu)##Sparcc计算
sparcc.metabo <- sparcc(a, iter = 20, inner_iter = 10, th = 0.1)
cor.metabo <- sparcc.metabo$Cor
#diag(cor.otu) <- 0 #将相关矩阵中对角线中的值(代表了自相关)转为 0
cor.metabo <- as.data.frame(cor.metabo)
rownames(cor.metabo) <- colnames(a)
colnames(cor.metabo) <- colnames(a)
#dir.create("network")
temp_nm1 <- "./network/"
write.table(cor.metabo, paste(temp_nm1, 'sparcc0.txt', sep = ""), sep = '\t', col.names = NA, quote = FALSE)
#通过 100 次自举抽样获取随机矩阵
set.seed(123)
n = 100
for (i in 1:n) {
amgut1.filt.boot <- sample(a, replace = TRUE) #bootstrap
amgut1.filt.sparcc_boot <- sparcc(amgut1.filt.boot, iter = 20, inner_iter = 10, th = 0.1) #sparcc 参数设置和上文保持一致
sparcc_boot <- amgut1.filt.sparcc_boot$Cor
colnames(sparcc_boot) <- colnames(amgut1.filt.boot)
rownames(sparcc_boot) <- colnames(amgut1.filt.boot)
write.table(sparcc_boot, paste(temp_nm1,'sparcc_boot', i, '.txt', sep = ''), sep = '\t', col.names = NA, quote = FALSE) #输出随机值的稀疏相关性矩阵
}
#基于上述观测值的稀疏相关性矩阵以及 100 次 bootstrap 的结果,获取稀疏相关性的伪 p 值
p <- cor.metabo
p[p!=0] <- 0
for (i in 1:n) {
p_boot <- read.delim(paste(temp_nm1,'sparcc_boot', i, '.txt', sep = ''), sep = '\t', row.names = 1)
p[abs(p_boot)>=abs(cor.metabo)] <- p[abs(p_boot)>=abs(cor.metabo)] + 1
}
p <- p / n
write.table(p, paste(temp_nm1,'pvals.two_sided.txt', sep = ""), sep = '\t', col.names = NA, quote = FALSE)
# 合并相关矩阵和伪p值矩阵,构建网络,以及可视化等
#观测值的相关矩阵
cor_sparcc <- read.delim(paste(temp_nm1, 'sparcc0.txt', sep = ""), row.names = 1, sep = '\t', check.names = FALSE)
#伪 p 值矩阵
pvals <- read.delim(paste(temp_nm1, 'pvals.two_sided.txt', sep = ""), row.names = 1, sep = '\t', check.names = FALSE)
#保留 |相关性|>0.3 且 p<0.05的值(不满足条件的相关性均赋值为 0)
cor_sparcc[abs(cor_sparcc)<=0.3 | pvals>=0.05] <- 0
pvals[abs(cor_sparcc)<=0.3 | pvals>=0.05] <- 1
cor_sparcc2 <- data.frame(row = rownames(cor_sparcc), cor_sparcc, check.names = F)
pvals2 <- data.frame(row = rownames(pvals), pvals, check.names = F)
nbar.m <- melt(cor_sparcc2)
nbap.m <- melt(pvals2)
# 将nbap.m的P值列加到nbar.m中去
nbar.m2 <- cbind(nbar.m, nbap.m$value)
names(nbar.m2)[3:4] <- c("Coef", "Pvalue")
# 删除相关系数为0的行
nbar.m2 <- nbar.m2[-which(nbar.m2$Coef == 0), ]
nbar.m2 <- nbar.m2[which(nbar.m2$Pvalue != 1), ]
dim(nbar.m2)
write.csv(nbar.m2, "association_metabolites20220906.csv")
#将相关矩阵中对角线中的值(代表了自相关)转为 0
diag(cor_sparcc) <- 0
#输出邻接矩阵类型的网络文件
write.table(cor_sparcc, paste(temp_nm1, 'neetwork.adj.txt', sep = ""), col.names = NA, sep = '\t', quote = FALSE)
## 开始用igraph构建网络
occor = cor_sparcc
# 确定物种间存在相互作用关系的阈值,将相关性R矩阵内不符合的数据转换为0
# 粪便将相关系数的阈值设置为0.3
#occor[abs(occor) < 0.3] = 0
# 构建igraph对象
igraph = graph_from_adjacency_matrix(as.matrix(occor),mode="undirected",weighted=TRUE,diag=FALSE)
igraph
# remove isolated nodes,即去掉和所有otu均无相关性的otu 可省略,前期矩阵已处理过
bad.vs = V(igraph)[degree(igraph) == 0]
igraph = delete.vertices(igraph, bad.vs)
igraph
# 将igraph weight属性赋值到igraph.weight
igraph.weight = E(igraph)$weight
# 做图前去掉igraph的weight权重,因为做图时某些layout会受到其影响
E(igraph)$weight = NA
# 简单出图
# 设定随机种子数,后续出图都从同一随机种子数出发,保证前后出图形状相对应
set.seed(123)
plot(igraph,main="Co-occurrence network",vertex.frame.color=NA,edge.width=1,
vertex.size=5,edge.lty=1,edge.curved=FALSE,margin=c(0,0,0,0))
#2.按相关类型设置边颜色
# 如果构建网络时,weighted=NULL,此步骤不能统计
sum(igraph.weight>0)# number of postive correlation
sum(igraph.weight<0)# number of negative correlation
# set edge color, postive correlation 设定为red, negative correlation设定为blue
E.color = igraph.weight
E.color = ifelse(E.color>0, "#D65DB1",ifelse(E.color<0, "#2C73D2","grey"))
E(igraph)$color = as.character(E.color)
# 改变edge颜色后出图
set.seed(123)
plot(igraph,main="Co-occurrence network",vertex.frame.color=NA,vertex.label=NA,edge.width=1,
vertex.size=5,edge.lty=1,edge.curved=FALSE,margin=c(0,0,0,0))
#3.按相关性设置边宽度
# 可以设定edge的宽 度set edge width,例如将相关系数与edge width关联
#E(igraph)$width = abs(igraph.weight)*20
E(igraph)$Coef. = abs(igraph.weight)
metabo_pro <- read.csv("Table_1.csv", header = T, row.names = 1, check.names = F)
igraph.size = metabo_pro[V(igraph)$name,9] # 筛选对应OTU属性
#colnames(igraph.size) <- "abundance"
#igraph.size1 = log(igraph.size*100) # 原始数据是什么,为什么*100再取e对数
V(igraph)$VIP = igraph.size
#' 2.按模块着色
# 模块性 modularity
fc = cluster_fast_greedy(igraph,weights =NULL)# cluster_walktrap cluster_edge_betweenness, cluster_fast_greedy, cluster_spinglass
modularity = modularity(igraph,membership(fc))
# 按照模块为节点配色
comps = membership(fc)
# 每个模块otus数目的多少
module_size = sizes(fc)
module_size
#colbar = rainbow(max(comps))
#colbar = c("gold","deeppink","deepskyblue","yellow","brown","pink")
#colbar = c("gold","deeppink","deepskyblue", "yellow","brown","pink","indianred1")
# display.brewer.all(type = "qual") 列出所有调色板,供选择
colbar = brewer.pal(7,"Dark2")
modbar = c(paste("module",seq(1,7,1),sep = " "))
V(igraph)$color = colbar[comps]
V(igraph)$modulname = modbar[comps]
modulcolor = colbar[as.numeric(as.factor(V(igraph)$modulname))]
#' 3.按照组间上调和下调进行配色
V(igraph)$Group = metabo_pro[V(igraph)$name,10]
colbar2 = c("#E16663", "#90CBD3")
my_color = colbar2[as.numeric(as.factor(V(igraph)$Group))]
#' 4.把每个
#'
#'
#'
#'
#'
# 用ggplot2画网络图参考来源:https://mr.schochastics.net/material/netvizr/
p3 <- ggraph(igraph, layout = "auto") +
geom_edge_link0(aes(edge_width = Coef.), edge_colour = E(igraph)$color) +
geom_node_point(aes(fill = Group, size = VIP, shape = Group)) +
#geom_node_point(aes(fill = modulname, size = size),shape = 21) +
geom_node_text(aes(label = name, size = VIP), family = "serif",
repel = TRUE) +
scale_shape_manual(values = c(21, 22)) +
scale_fill_manual(values = my_color) +
scale_edge_width(range = c(1, 2)) +
scale_size(range = c(2, 6)) +
theme_graph() +
theme(legend.position = "right",
legend.text = element_text(size=12),
legend.title = element_text(size = 15))
ggsave("metabo_network20220511.png", width = 16, height = 8, dpi = 300)
p4 <- ggraph(igraph, layout = "stress") +
geom_edge_link0(aes(edge_width = Coef.), edge_colour = E(igraph)$color) +
geom_node_point(aes(fill = modulname, size = VIP), shape = 21) +
#geom_node_point(aes(fill = modulname, size = size),shape = 21) +
geom_node_text(aes(label = name, size = VIP), family = "serif",
repel = TRUE) +
#scale_shape_manual(values = c(21, 22)) +
#scale_fill_manual(values = modulcolor) +
scale_edge_width(range = c(1, 2)) +
scale_size(range = c(2, 6)) +
theme_graph() +
theme(legend.position = "right",
legend.text = element_text(size=12),
legend.title = element_text(size = 15))
ggsave("metabo_network_module20220511.png", width = 16, height = 8, dpi = 300)
library(cowplot)
p5 <- plot_grid(p3,p4,labels = c("A", "B"), nrow = 2)
ggsave("metabo_network_merge0906.tiff",plot = p5, width = 12, height = 12, dpi = 300)
# note: we choose the hub Otu(s) based on at least two parameters from 'degree', 'closeness'
# or 'betweenness'.
# degree
mostcloseness_igraph <- names(closeness(igraph))[which(as.vector(closeness(igraph)) == max(closeness(igraph)))]
mostcloseness_igraph
mostdegree_igraph <- names(degree(igraph))[which(as.vector(degree(igraph)) == max(degree(igraph)))]
mostdegree_igraph
mostbetweenness_igraph <- names(betweenness(igraph))[which(as.vector(betweenness(igraph)) == max(betweenness(igraph)))]
mostbetweenness_igraph
# obtain the hub Otu
most_hub <- intersect(intersect(mostcloseness_igraph, mostdegree_igraph),
mostbetweenness_igraph) # 有时候三个条件不能同时满足,那就看满足任意两个条件的核心Otu是哪个
most_hub
hub_score(igraph)$vector
# 获得hub metabolites的名字
names(hub_score(igraph)$vector)[as.vector(hub_score(igraph)$vector) == 1.0]
# D-Galactose
as.vector(comps[names(comps) == "D-Galactose"]) # 快速确定核心Otu是哪个模块的
#radian.rescale <- function(x, start=0, direction=1) {
# c.rotate <- function(x) (x + start) %% (2 * pi) * direction
# c.rotate(ggplot2::rescale(x, c(0, 2 * pi), range(x)))
#}
#n <- length(V(igraph)$name)
#lab.locs <- radian.rescale(x=n, direction=1, start=0)
tiff("metabolite_network_module.tiff",width = 4000,height = 4000, res = 300)
#set.seed(100)
#par(mar=c(2,2,2,4))
e <- get.edgelist(igraph, names = FALSE)
#l <- qgraph.layout.fruchtermanreingold(e,vcount=vcount(subgraph_sub)) #https://stackoverflow.com/questions/39290909/igraph-resolving-tight-overlapping-nodes
#l <- qgraph.layout.fruchtermanreingold(e,vcount=vcount(subgraph_sub))
ll <- qgraph.layout.fruchtermanreingold(e,vcount=vcount(igraph),
area=8*(vcount(igraph)^2),repulse.rad=(vcount(igraph)^3.2))
#l <- layout_with_fr(igraph)
#ll <- norm_coords(ll, ymin = -1,ymax = 1,xmin = -1,xmax = 1)
plot(igraph,
rescale=TRUE,
layout=ll,
vertex.label.dist=0.1,
#vertex.label=NA,
#vertex.color = my_color,
vertex.label.color="black",
vertex.label.degree=0,
edge.curved=FALSE,
vertex.shape=ifelse(V(igraph)$name %in% "D-Galactose", "square","circle"),
vertex.label.cex=ifelse(V(igraph)$name %in% "D-Galactose", 1.6, 1.2),
vertex.label.font=1,
edge.width=E(igraph)$width,
vertex.frame.color=NA,
margin=c(0,0,0,0))
title("Co-occurrence network", cex.main=1.8)
library(stringr)
text(0.5, 1.1, labels = "Hub metabolites: D-Galactose", cex=1.5)
legend("topleft",legend = levels(as.factor(unique(V(igraph)$modulname))),
fill=c("#8DD3C7","#FFFFB3","#BEBADA","#FB8072","#80B1D3","#FDB462"),bty = "n",cex=1.4,title = "Module names")
#legend("topleft",legend = levels(as.factor(unique(V(igraph)$updown))),
# col = colbar2,bty = "n", pch = 20, cex=1.4, text.col = colbar2)
#legend("topleft",legend = paste("module", seq(1,11,1), sep = ""),
# fill=c("#8DD3C7","#FFFFB3","#BEBADA","#FB8072","#80B1D3",
# "#FDB462","#B3DE69","#FCCDE5","#D9D9D9","#BC80BD","#CCEBC5"),bty = "n",cex=1.2,title = "Module names")
#E.color = ifelse(E.color>0, "lightcoral",ifelse(E.color<0, "lightgreen","grey"))
#legend(-1.2, -1.1,legend = c("module1"), col = c("lightcoral"), lty = 1, lwd = 3,
# cex = 1.4,title = "Orientation Of module to Indosyl sulfate", bty = "n")
dev.off()
################### step2 取核心Otu所在的模块 ###################
### ####
#################################################################
# 将某个模块抽出来
module_sub <- fc[2]$`2` # 将属于模块XX的otu选出来
subgraph_sub <- induced_subgraph(igraph, module_sub) # 提取属于module1的网络节点图
fc2 = cluster_fast_greedy(subgraph_sub,weights =NULL)# cluster_walktrap cluster_edge_betweenness, cluster_fast_greedy, cluster_spinglass
modularity2 = modularity(subgraph_sub,membership(fc2))
# 按照模块为节点配色
comps2 = membership(fc2)
# 每个模块otus数目的多少
module_size2 = sizes(fc2)
module_size2
# display.brewer.all(type = "qual") 列出所有调色板,供选择
colbar2 = brewer.pal(3,"Set2")
modbar2 = c(paste("sub_module",seq(1,2,1),sep = ""))
V(subgraph_sub)$color = colbar2[comps2]
V(subgraph_sub)$modulname = modbar2[comps2]
#V(subgraph_sub)$color <- ifelse(V(subgraph_sub)$name == 'Otu36', "gold", "lightgoldenrod1")
#V(subgraph_sub)$color <- ifelse(V(subgraph_sub)$name %in% c('Otu38', 'Otu238'), "deepskyblue", "lightblue")
#V(subgraph_sub)$size <- ifelse(V(subgraph_sub)$name == 'Otu238', 5, 2)
tiff(paste(temp_nm5, "_otu_network_3-Indole carboxylic acid glucuronide_sub_module2.tiff", sep = ""),width = 4000,height = 4000,res = 300)
#set.seed(123)
#par(mar=c(2,2,2,4))
#l <- layout_with_fr(subgraph_sub)
#l <- layout_in_circle(subgraph_sub)
#l <- norm_coords(l, ymin = -1,ymax = 1,xmin = -1,xmax = 1)
e <- get.edgelist(subgraph_sub, names = FALSE)
#l <- qgraph.layout.fruchtermanreingold(e,vcount=vcount(subgraph_sub)) #https://stackoverflow.com/questions/39290909/igraph-resolving-tight-overlapping-nodes
#l <- qgraph.layout.fruchtermanreingold(e,vcount=vcount(subgraph_sub))
ll <- qgraph.layout.fruchtermanreingold(e,vcount=vcount(subgraph_sub),
area=10*(vcount(subgraph_sub)^2),repulse.rad=(vcount(subgraph_sub)^3.1))
#ll <- as.matrix(ll)
plot(subgraph_sub,rescale=T,layout=ll, vertex.label.cex=ifelse(V(subgraph_sub)$name %in% c('Otu238'), 1.6,1.2),
vertex.label.dist=0.5, vertex.shape=ifelse(V(subgraph_sub)$name %in% c('Otu238'), "square","circle"),
vertex.frame.color=FALSE,vertex.label.color="black",
vertex.label.degree=-0.6,edge.curved=FALSE,
vertex.label.font=1)
text(0.5, 1.1, labels = "Hub Otu: Otu238 (Prevotella copri_N)", cex=1.5)
title("Co-occurrence network with 3-Indole carboxylic acid glucuronide under Module2", cex.main=1.8)
legend("topleft",legend = levels(as.factor(unique(V(subgraph_sub)$modulname))),
fill=c("#66C2A5","#FC8D62"),bty = "n",cex=1.4,title = "Sub_module names")
legend(-1.2, -1,legend = c("sub_module1 (Otu768 excluded)"), col = c("lightcoral"), lty = 1, lwd = 3,
cex = 1.4,title = "Orientation Of module to LCA-GC", bty = "n")
dev.off()
################## step3 可以将网络属性储存到相应文件中 ############################
##### ######
################# 用于其它可视化软件的输入文件 ############################
#这种转换模式下,默认的边权重代表了 sparcc 计算的相关性(存在负值)
#由于边权重通常为正值,因此最好取个绝对值,相关性重新复制一列作为记录
E(igraph)$sparcc <- E(igraph)$weight
E(igraph)$weight <- abs(E(igraph)$weight)
#再转为其它类型的网络文件,例如
#边列表,包括节点相关性的信息、边的权重系数等
edge <- data.frame(as_edgelist(igraph))
edge_list <- data.frame(
source = edge[[1]],
target = edge[[2]],
weight = E(igraph)$weight,
sparcc = E(igraph)$sparcc
)
head(edge_list)
write.table(edge_list, paste(temp_nm5, 'network.edge_list.txt',sep = ""), sep = '\t', row.names = FALSE, quote = FALSE)
#节点属性列表,对应边列表,记录节点属性,例如
node_list <- data.frame(
id = V(igraph)$name,
label = V(igraph)$name,
degree = degree(igraph),
betweenness = betweenness(igraph),
closness = closeness(igraph)
)
head(node_list)
write.table(node_list, paste(temp_nm5, 'network.node_list.txt',sep = ""), sep = '\t', row.names = FALSE, quote = FALSE)
#graphml 格式,可使用 gephi 软件打开并进行可视化编辑
write.graph(igraph, paste(temp_nm5, 'network.graphml', sep = ""), format = 'graphml')