-
Notifications
You must be signed in to change notification settings - Fork 5
/
.Rhistory
512 lines (512 loc) · 35.8 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_color_gradient(low = "black", high = "red")
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(low = "black",mid = "red", high = "red")
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(low = "black",mid = "red", high = "red" , mid = 1.303)
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(low = "black",mid = "red", high = "red" , midpoint = 1.303)
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(low = "black",mid = "red", high = "red" , midpoint = 2.5)
library(AVARDA)
library(data.table)
blast = fread("~/Desktop/bin2/VirScan_filtered_virus_blast_new.csv")
data = fread("~/Desktop/bin2/AVARDA_test_data.csv")
total = fread("~/Desktop/bin2/total_probability_xr2.csv")
dict = fread("~/Desktop/bin2/my_dict_shortnames.csv")
pairwise = fread("~/Desktop/bin2/unique_probabilities3.csv")
data2 = fread("~/Desktop/bin2/pairwise_enrichments_sing.csv")
data2 = data2 %>% select(1,73)
data2 = dplyr::select(data2,1,73)
tex = AVARDA_compiled(case_data = data2,blast = blast,total_prob = total,pairwise = pairwise,dict = dict,threshold = 10)
lsf.str()
search()
tex = AVARDA_compiled(case_data = data2,blast = blast,total_prob = total,pairwise = pairwise,dict = dict,threshold = 10)
parallel::detectCores()
source('~/Desktop/AVARDA/R/AVARDA_compiled.R')
tex = AVARDA_compiled(case_data = data2,blast = blast,total_prob = total,pairwise = pairwise,dict = dict,threshold = 10)
tex = AVARDA_compiled(case_data = data2,blast = blast,total_prob = total,pairwise = pairwise,dict = dict,threshold = 10,cores = 2)
library(AVARDA)
tex = AVARDA_compiled(case_data = data2,blast = blast,total_prob = total,pairwise = pairwise,dict = dict,threshold = 10,cores = 2)
devtools::install_github("hadley/devtools")
library(devtools)
load_all()
tex = AVARDA_compiled(case_data = data2,blast = blast,total_prob = total,pairwise = pairwise,dict = dict,threshold = 10,cores = 2)
tex = AVARDA_compiled(case_data = data2,blast = blast,total_prob = total,pairwise = pairwise,dict = dict,threshold = 10,cores = 2)
rownames_to_column
source('~/Desktop/AVARDA/R/AVARDA_compiled.R')
load_all()
document()
load_all()
library(data.table)
library(AVARDA)
library(tidyverse)
library(doParallel)
library(foreach)
blast = fread("~/Desktop/bin2/VirScan_filtered_virus_blast_new.csv")
data = fread("~/Desktop/bin2/AVARDA_test_data.csv")
total = fread("~/Desktop/bin2/total_probability_xr2.csv")
dict = fread("~/Desktop/bin2/my_dict_shortnames.csv")
pairwise = fread("~/Desktop/bin2/unique_probabilities3.csv")
data2 = fread("~/Desktop/bin2/pairwise_enrichments_sing.csv")
data2 = dplyr::select(data2,1,73)
tex = AVARDA_compiled(case_data = data2,blast = blast,total_prob = total,pairwise = pairwise,dict = dict,threshold = 10,cores = 2)
library(data.table)
library(AVARDA)
library(tidyverse)
library(doParallel)
library(foreach)
blast = fread("~/Desktop/bin2/VirScan_filtered_virus_blast_new.csv")
data = fread("~/Desktop/bin2/AVARDA_test_data.csv")
total = fread("~/Desktop/bin2/total_probability_xr2.csv")
dict = fread("~/Desktop/bin2/my_dict_shortnames.csv")
pairwise = fread("~/Desktop/bin2/unique_probabilities3.csv")
data2 = fread("~/Desktop/bin2/pairwise_enrichments_sing.csv")
data2 = dplyr::select(data2,1,73)
tex = AVARDA_compiled(case_data = data2,blast = blast,total_prob = total,pairwise = pairwise,dict = dict,threshold = 10,cores = 2)
library(data.table)
library(AVARDA)
blast = fread("~/Desktop/bin2/VirScan_filtered_virus_blast_new.csv")
data = fread("~/Desktop/bin2/AVARDA_test_data.csv")
total = fread("~/Desktop/bin2/total_probability_xr2.csv")
dict = fread("~/Desktop/bin2/my_dict_shortnames.csv")
pairwise = fread("~/Desktop/bin2/unique_probabilities3.csv")
data2 = fread("~/Desktop/bin2/pairwise_enrichments_sing.csv")
data2 = dplyr::select(data2,1,73)
tex = AVARDA_compiled(case_data = data2,blast = blast,total_prob = total,pairwise = pairwise,dict = dict,threshold = 10,cores = 2)
AVARDA_compiled
check()
library(devtools)
check()
tex = AVARDA_compiled(case_data = data2,blast = blast,total_prob = total,pairwise = pairwise,dict = dict,threshold = 10,cores = 2)
library(data.table)
library(AVARDA)
blast = fread("~/Desktop/bin2/VirScan_filtered_virus_blast_new.csv")
data = fread("~/Desktop/bin2/AVARDA_test_data.csv")
total = fread("~/Desktop/bin2/total_probability_xr2.csv")
dict = fread("~/Desktop/bin2/my_dict_shortnames.csv")
pairwise = fread("~/Desktop/bin2/unique_probabilities3.csv")
data2 = fread("~/Desktop/bin2/pairwise_enrichments_sing.csv")
data2 = dplyr::select(data2,1,73)
tex = AVARDA_compiled(case_data = data2,blast = blast,total_prob = total,pairwise = pairwise,dict = dict,threshold = 10,cores = 2)
library(devtools)
install_github("drmonaco/AVARDA")
library(AVARDA)
tex = AVARDA_compiled(case_data = data2,blast = blast,total_prob = total,pairwise = pairwise,dict = dict,threshold = 10,cores = 2)
load_all()
document()
tex = AVARDA_compiled(case_data = data2,blast = blast,total_prob = total,pairwise = pairwise,dict = dict,threshold = 10,cores = 2)
library(data.table)
library(AVARDA)
blast = fread("~/Desktop/bin2/VirScan_filtered_virus_blast_new.csv")
data = fread("~/Desktop/bin2/AVARDA_test_data.csv")
total = fread("~/Desktop/bin2/total_probability_xr2.csv")
dict = fread("~/Desktop/bin2/my_dict_shortnames.csv")
pairwise = fread("~/Desktop/bin2/unique_probabilities3.csv")
data2 = fread("~/Desktop/bin2/pairwise_enrichments_sing.csv")
data2 = dplyr::select(data2,1,73)
tex = AVARDA_compiled(case_data = data2,blast = blast,total_prob = total,pairwise = pairwise,dict = dict,threshold = 10,cores = 2)
library(data.table)
library(AVARDA)
# library(tidyverse)
# library(doParallel)
# library(foreach)
blast = fread("~/Desktop/bin2/VirScan_filtered_virus_blast_new.csv")
data = fread("~/Desktop/bin2/AVARDA_test_data.csv")
total = fread("~/Desktop/bin2/total_probability_xr2.csv")
dict = fread("~/Desktop/bin2/my_dict_shortnames.csv")
pairwise = fread("~/Desktop/bin2/unique_probabilities3.csv")
data2 = fread("~/Desktop/bin2/pairwise_enrichments_sing.csv")
data2 = dplyr::select(data2,1,73)
tex = AVARDA_compiled(case_data = data2,blast = blast,total_prob = total,pairwise = pairwise,dict = dict,threshold = 10,cores = 2)
blast = fread("~/Desktop/bin2/VirScan_filtered_virus_blast_new.csv")
data = fread("~/Desktop/bin2/AVARDA_test_data.csv")
total = fread("~/Desktop/bin2/total_probability_xr2.csv")
dict = fread("~/Desktop/bin2/my_dict_shortnames.csv")
pairwise = fread("~/Desktop/bin2/unique_probabilities3.csv")
data2 = fread("~/Desktop/bin2/pairwise_enrichments_sing.csv")
data2 = dplyr::select(data2,1,73)
tex = AVARDA_compiled(case_data = data2,blast = blast,total_prob = total,pairwise = pairwise,dict = dict,threshold = 10,cores = 2)
source('~/Desktop/AVARDA/R/AVARDA_compiled.R')
library(devtools)
document()
check()
load_all()
rm(list = ls())
library(data.table)
library(AVARDA)
blast = fread("~/Desktop/bin2/VirScan_filtered_virus_blast_new.csv")
data = fread("~/Desktop/bin2/AVARDA_test_data.csv")
total = fread("~/Desktop/bin2/total_probability_xr2.csv")
dict = fread("~/Desktop/bin2/my_dict_shortnames.csv")
pairwise = fread("~/Desktop/bin2/unique_probabilities3.csv")
data2 = fread("~/Desktop/bin2/pairwise_enrichments_sing.csv")
data2 = dplyr::select(data2,1,73)
tex = AVARDA_compiled(case_data = data2,blast = blast,total_prob = total,pairwise = pairwise,dict = dict,threshold = 10,cores = 2)
library(devtools)
document()
case_data.R = case_data %>% filter(.[[2]] >= threshold) %>% mutate(V1 = as.character(V1))%>% left_join(blast,by = "V1") %>% select(-2) # threshold hits matrix and create blast matrix
case_data = cae
case_data = case
case_data = data2
case_data.R = case_data %>% filter(.[[2]] >= threshold) %>% mutate(V1 = as.character(V1))%>% left_join(blast,by = "V1") %>% select(-2) # threshold hits matrix and create blast matrix
threshold = 10
case_data.R = case_data %>% filter(.[[2]] >= threshold) %>% mutate(V1 = as.character(V1))%>% left_join(blast,by = "V1") %>% select(-2) # threshold hits matrix and create blast matrix
case.binary = binary(case_data.R,80) # create binary matrix of evidence alignments
csums_index = which(case.binary[-1] %>% colSums() >= 5) + 1 # get just viruses that are enriched
case.binary.sub.evidence = case.binary %>% select(c(1,csums_index)) # subset evidence peptides above index - binary matrix of evidence peptides by alignnment
case.binary.sub.2 = (case.binary.sub.evidence [-1]%>% colSums()) %>% as.data.frame() %>% rename(peps = ".") %>% mutate(all_peptides = dim(case.binary.sub.evidence)[1]) %>%
rownames_to_column() %>% left_join(total_prob, "rowname")
total_prob = total
case_data.R = case_data %>% filter(.[[2]] >= threshold) %>% mutate(V1 = as.character(V1))%>% left_join(blast,by = "V1") %>% select(-2) # threshold hits matrix and create blast matrix
case.binary = binary(case_data.R,80) # create binary matrix of evidence alignments
csums_index = which(case.binary[-1] %>% colSums() >= 5) + 1 # get just viruses that are enriched
case.binary.sub.evidence = case.binary %>% select(c(1,csums_index)) # subset evidence peptides above index - binary matrix of evidence peptides by alignnment
case.binary.sub.2 = (case.binary.sub.evidence [-1]%>% colSums()) %>% as.data.frame() %>% rename(peps = ".") %>% mutate(all_peptides = dim(case.binary.sub.evidence)[1]) %>%
rownames_to_column() %>% left_join(total_prob, "rowname")
case.binary.sub.2 = case.binary.sub.2 %>% mutate(pVal = mapply(bt, case.binary.sub.2$peps, case.binary.sub.2$all_peptides,case.binary.sub.2$V1)) %>%
arrange(pVal)
case.binary.sub.2 = (case.binary.sub.evidence [-1]%>% colSums()) %>% as.data.frame() %>% rename(peps = ".") %>% mutate(all_peptides = dim(case.binary.sub.evidence)[1]) %>%
rownames_to_column() %>% left_join(total_prob, "rowname")
case.binary.sub.2
case.binary.sub.2 = (case.binary.sub.evidence [-1]%>% colSums()) %>% as.data.frame() %>% rename(peps = ".") %>% mutate(all_peptides = dim(case.binary.sub.evidence)[1])
View(case.binary.sub.2)
case.binary.sub.2 = (case.binary.sub.evidence [-1]%>% colSums()) %>% as.data.frame() %>% rename(peps = ".") %>%
rownames_to_column() %>% mutate(all_peptides = dim(case.binary.sub.evidence)[1]) %>% left_join(total_prob, "rowname")
load_all()
source('~/Desktop/AVARDA/R/AVARDA_compiled.R')
load_all()
library(data.table)
library(AVARDA)
# library(tidyverse)
# library(doParallel)
# library(foreach)
blast = fread("~/Desktop/bin2/VirScan_filtered_virus_blast_new.csv")
data = fread("~/Desktop/bin2/AVARDA_test_data.csv")
total = fread("~/Desktop/bin2/total_probability_xr2.csv")
dict = fread("~/Desktop/bin2/my_dict_shortnames.csv")
pairwise = fread("~/Desktop/bin2/unique_probabilities3.csv")
data2 = fread("~/Desktop/bin2/pairwise_enrichments_sing.csv")
data2 = dplyr::select(data2,1,73)
tex = AVARDA_compiled(case_data = data2,blast = blast,total_prob = total,pairwise = pairwise,dict = dict,threshold = 10,cores = 2)
#
# x = Module_1(data2[,1:2],blast,total,threshold = 10)
#
# x2 = Module_2(x =x[[3]] ,dict = dict,total = total)
#
# x3 = Module_3(x,x2,blast,total,pairwise,dict)
#
# x4 =sapply(x3,compiler,blast = blast,total_prob = total,dict = dict) %>% t() %>% as.data.frame() %>% mutate(pBH = p.adjust(pVal))
#
library(devtools)
document()
load_all()
blast = fread("~/Desktop/bin2/Phageome/avarda_virus.csv")
data = fread("~/Desktop/bin2/Phageome/phipseq_0140_PhageomeLa_001_Hits_foldchange.tsv")
total = fread("~/Desktop/bin2/Phageome/total_probs_phageome_virus.csv")
dict = fread("~/Desktop/bin2/Phageome/blast_dictionary_phageome.csv")
library(data.table)
library(AVARDA)
blast = fread("~/Desktop/bin2/Phageome/avarda_virus.csv")
data = fread("~/Desktop/bin2/Phageome/phipseq_0140_PhageomeLa_001_Hits_foldchange.tsv")
total = fread("~/Desktop/bin2/Phageome/total_probs_phageome_virus.csv")
dict = fread("~/Desktop/bin2/Phageome/blast_dictionary_phageome.csv")
pairwise = fread("~/Desktop/bin2/unique_probabilities3.csv")
data2 = fread("~/Desktop/bin2/pairwise_enrichments_sing.csv")
data2 = dplyr::select(data2,1,73)
tex = AVARDA_compiled(case_data = data2,blast = blast,total_prob = total,pairwise = pairwise,dict = dict,threshold = 10,cores = 2)
View(blast)
View(total)
View(data2)
data2 = dplyr::select(data,1,73)
View(data2)
blast = fread("~/Desktop/bin2/Phageome/avarda_virus.csv")
data = fread("~/Desktop/bin2/Phageome/phipseq_0140_PhageomeLa_001_Hits_foldchange.tsv") %>% rename(V1 = "u_pep_id")
data2 = dplyr::select(data,1,73)
tex = AVARDA_compiled(case_data = data2,blast = blast,total_prob = total,pairwise = pairwise,dict = dict,threshold = 10,cores = 2)
View(data2)
data = fread("~/Desktop/bin2/AVARDA_test_data.csv")
head(data)
colnames(data)
colnames(data2)
colnames(datadict)
colnames(dict)
colnames(total)
colnames(blast)
blast = fread("~/Desktop/bin2/VirScan_filtered_virus_blast_new.csv")
colnames(blast)
blast = fread("~/Desktop/bin2/Phageome/avarda_virus.csv") %>% rename(V1 = "u_pep_id")
data = fread("~/Desktop/bin2/Phageome/phipseq_0140_PhageomeLa_001_Hits_foldchange.tsv") %>% rename(V1 = "u_pep_id")
total = fread("~/Desktop/bin2/Phageome/total_probs_phageome_virus.csv")
data2 = dplyr::select(data,1,73)
tex = AVARDA_compiled(case_data = data2,blast = blast,total_prob = total,pairwise = pairwise,dict = dict,threshold = 10,cores = 2)
data.R = data2 %>% select(1,2)
colnames(data.R)[2]
#data2 = fread("~/Desktop/bin2/pairwise_enrichments_sing.csv")
threshold = 5
x = Module_1(case_data = data.R,blast = blast,total_prob = total_prob,threshold = threshold)
blast = fread("~/Desktop/bin2/Phageome/avarda_virus.csv") %>% rename(V1 = "u_pep_id")
data = fread("~/Desktop/bin2/Phageome/phipseq_0140_PhageomeLa_001_Hits_foldchange.tsv") %>% rename(V1 = "u_pep_id")
total_prob = fread("~/Desktop/bin2/Phageome/total_probs_phageome_virus.csv")
dict = fread("~/Desktop/bin2/Phageome/blast_dictionary_phageome.csv")
pairwise = fread("~/Desktop/bin2/unique_probabilities3.csv")
#data2 = fread("~/Desktop/bin2/pairwise_enrichments_sing.csv")
threshold = 5
data2 = dplyr::select(data,1,73)
data.R = data2 %>% select(1,2)
colnames(data.R)[2]
x = Module_1(case_data = data.R,blast = blast,total_prob = total_prob,threshold = threshold)
if(dim(x[[2]])[1] != 0){
x2 = Module_2(x =x[[3]] ,dict = dict,total_prob = total_prob)
print("Module2_finished")
}
x2 = Module_2(x =x[[3]] ,dict = dict,total_prob = total_prob)
data.R = data2 %>% select(1,2)
colnames(data.R)[2]
x = Module_1(case_data = data.R,blast = blast,total_prob = total_prob,threshold = threshold)
print("Module1_finished")
x2 = Module_2(x =x[[3]] ,dict = dict,total_prob = total_prob)
print("Module2_finished")
Module_2
knitr::opts_chunk$set(echo = TRUE)
ccp = fread("~/Downloads/CCP serology.csv") %>% dplyr::filter(`CCP Status` == "(+)")
library(MASS)
library(tidyverse)
library(data.table)
library(ggbeeswarm)
library(pheatmap)
library(ggplot2)
library(ggpubr)
library(knitr)
library(igraph)
ccp = fread("~/Downloads/CCP serology.csv") %>% dplyr::filter(`CCP Status` == "(+)")
omega2 = fread("~/omega2.csv")
omega3 = fread("~/omega3.csv")
pad4 = fread("~/Downloads/OneDrive_1_1-14-2021/Unmod_PAD4_NM_zscores.txt") %>% dplyr::select(c(V1,ccp$`Patient ID`))
pad2 = fread("~/Downloads/OneDrive_1_1-14-2021/Unmod_PAD2_NM_zscores.txt") %>% dplyr::select(c(V1,ccp$`Patient ID`))
colnames(pad2) = paste0("PAD2_",colnames(pad2))
colnames(pad4) = paste0("PAD4_",colnames(pad4))
matrix = cbind(pad2,pad4) %>% as.data.frame() %>% dplyr::select(-PAD4_V1)%>% filter( PAD2_V1 %in% (omega2 %>% unlist %>% as.character)) %>% column_to_rownames("PAD2_V1")
matrix[matrix<7] = 0
matrix[matrix>=7] = 1
matrix2 = matrix %>% rownames_to_column()%>% right_join(omega3)
matrix.3 = cbind(pad2,pad4) %>% as.data.frame() %>% dplyr::select(-PAD4_V1) %>% as.data.frame()
edge = fread("omega_blast_x.csv") %>% select(qseqid,sseqid,evalue) %>% filter(evalue < 1) %>% select(-evalue)
filter_func = function(edge,vertex){ #independence filter that takes a dictionary (defined above) and a set of nodes and tells the minimal number of unique epitopes
nodes = unlist(vertex)
links_filtered = edge %>% filter(qseqid %in% nodes)
links_filtered = links_filtered %>% filter(sseqid %in% nodes)
if(dim(links_filtered)[1]!=0){
net <- as.undirected(graph_from_data_frame(d=links_filtered,vertices=nodes, directed=F) )
x = decompose.graph(net)
x_1 = x[sapply(x,vcount)<30]
x_1_sum = sum(unlist(lapply(x_1,independence.number)))
x_2 = x[sapply(x,vcount)>=30]
temp = c()
#x_2 = x
if(length(x_2) >0){
for(R in 1:length(x_2)){
x_2_r = x_2[[R]]
while(max(degree(x_2_r)>5)){
toss = degree(x_2_r)==max(degree(x_2_r))
x_2_r = delete_vertices(x_2_r, V(x_2_r)[toss])
}
x_l = decompose.graph(x_2_r)
temp[R] = sum(unlist(lapply(x_l,independence.number)))
}
}
return(sum(x_1_sum)+sum(temp))
}
if(dim(links_filtered)[1]==0){
return(length(nodes))
}
}
df = data.frame(matrix(nrow = 0,ncol = 7))
colnames(df) = c("ID","PAD2_all","PAD4_all","PAD2_ind","PAD4_ind","PAD2_total","PAD4_total")
for(R in 2:(dim(matrix2)[2]-1)){
name = colnames(matrix2)[R]
sample.R = matrix2[,c(1,R,44)] %>% rename(Sample =name)%>% filter(Sample ==1)
sample.R.pad2 = sample.R %>% filter(PAD == "PAD2")
sample.R.pad4 = sample.R %>% filter(PAD == "PAD4")
pad2_ind = filter_func(edge,sample.R.pad2$rowname)
pad4_ind = filter_func(edge,sample.R.pad4$rowname)
matrix.3.R = matrix.3 %>% select(name) %>% filter(. >=7)
if(grepl("PAD2",name)){
PAD2_total = dim(matrix.3.R)[1]
PAD4_total = 0
}
if(grepl("PAD4",name)){
PAD4_total = dim(matrix.3.R)[1]
PAD2_total = 0
}
df[R-1,] = c(name,dim(sample.R.pad2)[1],dim(sample.R.pad4)[1],pad2_ind,pad4_ind,PAD2_total,PAD4_total)
}
bt <- function(a, b, p = 0.5) {binom.test(a, b, 0.5, alternative=
c("two.sided"), conf.level = 0.95)$p.value}
pad_sum = df %>% mutate(ID.2 = substring(ID,6,10)) %>% select(-ID) %>% group_by(ID.2) %>% summarise_all(max) %>% mutate(across(c(-ID.2), as.numeric)) %>% mutate(all_hits = PAD2_total+PAD4_total) %>% mutate(test = PAD2_ind+PAD4_ind) %>% mutate(test = ifelse(test == 0,1,test))
pad_sum$pVal <- mapply(bt, pad_sum$PAD2_ind, pad_sum$test)
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(low = "black",mid = "red", high = "red" , midpoint = 2.5)
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(low = "black",mid = "orange", high = "red" , midpoint = 2.5)
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(low = "black",mid = "orange", high = "red" , midpoint = 2.5)+theme(text = element_text(size=20)
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(low = "black",mid = "orange", high = "red" , midpoint = 2.5)+theme(text = element_text(size=20))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(low = "black",mid = "orange", high = "red" , midpoint = 2.5)+theme(text = element_text(size=20))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(low = "yellow",mid = "orange", high = "red" , midpoint = 2.5)+theme(text = element_text(size=20))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(low = "yellow",mid = "orange", high = "red" , midpoint = 1.313)+theme(text = element_text(size=20))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(low = "#91bfdb",mid = "#ffffbf", high = "#fc8d59" , midpoint = 1.313)+theme(text = element_text(size=20))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(low = "#91bfdb",mid = "#ffffbf", high = "#fc8d59" , midpoint = 1.313)+theme(text = element_text(size=20))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(low = "#91bfdb",mid = "#ffffbf", high = "#fc8d59" , midpoint = 1.313,space = "rgb")+theme(text = element_text(size=20))
?scale_colour_gradient2
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(midpoint = 1.313,space =)+theme(text = element_text(size=20))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(midpoint = 1.313,space =)+theme(text = element_text(size=20))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(low = "#2c7bb6",high = "#d7191c"
midpoint = 1.313,space =)+theme(text = element_text(size=20))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(low = "#2c7bb6",high = "#d7191c",
midpoint = 1.313,space =)+theme(text = element_text(size=20))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(low = "#2c7bb6",mid = "yellow",high = "#d7191c",
midpoint = 1.313,space =)+theme(text = element_text(size=20))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(low = "#2c7bb6",mid = "yellow",high = "#d7191c",
midpoint = 1.313,space =)+theme(text = element_text(size=20))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(low = "#2c7bb6",mid = "black",high = "#d7191c",
midpoint = 1.313,space =)+theme(text = element_text(size=20))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(low = "#2c7bb6",mid = "black",high = "#d7191c",
midpoint = 1.313,breaks=seq(0,.25,.5,.75,1,2,3,4,5))+theme(text = element_text(size=20))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(low = "#2c7bb6",mid = "black",high = "#d7191c",
midpoint = 1.313,breaks=c(0,.25,.5,.75,1,2,3,4,5))+theme(text = element_text(size=20))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_colour_gradient2(low = "#2c7bb6",mid = "black",high = "#d7191c",
midpoint = 1.313,breaks=c(0,.25,.5,.75,1,2,3,4,5))+theme(text = element_text(size=20))
log(.05)
log10(.05)
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+
scale_fill_gradient2(low = "#0000FF", mid = "#FFFFFF", high ="#FF0000",
midpoint = 1.30103, space = "rgb",
guide = "colourbar",
limits = c(min(v$value), max(v$value)),
breaks=pretty_breaks(n=4)(min(v$value):max(v$value)))+theme(text = element_text(size=20))
library(scales)
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+
scale_fill_gradient2(low = "#0000FF", mid = "#FFFFFF", high ="#FF0000",
midpoint = 1.30103, space = "rgb",
guide = "colourbar",
limits = c(min(v$value), max(v$value)),
breaks=pretty_breaks(n=4)(min(v$value):max(v$value)))+theme(text = element_text(size=20))
min(-log10(pad_sum$pVal)):max(-log10(pad_sum$pVal))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+
scale_fill_gradient2(low = "#0000FF", mid = "#FFFFFF", high ="#FF0000",
midpoint = 1.30103, space = "rgb",
guide = "colourbar",
limits = c(min(-log10(pad_sum$pVal)), max(-log10(pad_sum$pVal))),
breaks=pretty_breaks(n=4)(min(-log10(pad_sum$pVal)):max(-log10(pad_sum$pVal))))+theme(text = element_text(size=20))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = -log10(pVal)))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+
scale_fill_gradient2(low = "#0000FF", mid = "#FFFFFF", high ="#FF0000",
midpoint = 1.30103,
limits = c(min(-log10(pad_sum$pVal)), max(-log10(pad_sum$pVal))),
breaks=pretty_breaks(n=4)(min(-log10(pad_sum$pVal)):max(-log10(pad_sum$pVal))))+theme(text = element_text(size=20))
pretty_breaks(n=4)(min(-log10(pad_sum$pVal)):max(-log10(pad_sum$pVal))))
pretty_breaks(n=4)(min(-log10(pad_sum$pVal)):max(-log10(pad_sum$pVal)))
pad_sum = pad_sum %>% mutate(color= ifelse(pVal <.05,"sig","insig"))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = color))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = color))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = color))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_color_manual(values=c("black", "red")
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = color))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_color_manual(values=c("black", "red"))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = color))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_color_manual(values=c("black", "red"))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = color))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_color_manual(values=c("black", "red"))+theme(text = element_text(size=20))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = pVal))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+theme(text = element_text(size=20)) + scale_fill_gradient(name = "count", trans = "log")
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = pVal))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+theme(text = element_text(size=20)) + scale_fill_gradient(name = "count", trans = "log",scale = "trans")
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = pVal))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+theme(text = element_text(size=20)) + scale_fill_gradient(name = "count", trans = "log", trans = "log")
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = pVal))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+theme(text = element_text(size=20)) + scale_fill_gradient(name = "count", trans = "log")
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = pVal))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+theme(text = element_text(size=20)) + scale_fill_gradient(name = "count", trans = "log",my_breaks = c(2, 10, 50, 250, 1250, 6000))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = pVal))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+theme(text = element_text(size=20)) + scale_fill_gradient(name = "count", trans = "log",my_breaks = c(1, .1, .01, .001, .0001, .00001,.000001))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = pVal))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+theme(text = element_text(size=20)) + scale_fill_gradient(name = "count", trans = "log",breaks = c(1, .1, .01, .001, .0001, .00001,.000001))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+theme(text = element_text(size=20)) + scale_fill_gradient(name = "count", trans = "log",breaks = c(1, .1, .01, .001, .0001, .00001,.000001))
my_breaks = c(2, 10, 50, 250, 1250, 6000)
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+theme(text = element_text(size=20)) + scale_fill_gradient(name = "count", trans = "log",
breaks = my_breaks, labels = my_breaks)
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = pVal))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+theme(text = element_text(size=20)) + scale_fill_gradient(name = "count", trans = "log",
breaks = my_breaks, labels = my_breaks)
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = pVal))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+theme(text = element_text(size=20)) + scale_fill_gradient(name = "count", trans = "log",breaks = my_breaks, labels = my_breaks)
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = pVal))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+theme(text = element_text(size=20)) + scale_fill_gradient(name = "count", breaks = my_breaks, labels = my_breaks)
ccp = fread("~/Downloads/CCP serology.csv") %>% dplyr::filter(`CCP Status` == "(+)")
omega2 = fread("~/omega2.csv")
omega3 = fread("~/omega3.csv")
pad4 = fread("~/Downloads/OneDrive_1_1-14-2021/Unmod_PAD4_NM_zscores.txt") %>% dplyr::select(c(V1,ccp$`Patient ID`))
pad2 = fread("~/Downloads/OneDrive_1_1-14-2021/Unmod_PAD2_NM_zscores.txt") %>% dplyr::select(c(V1,ccp$`Patient ID`))
colnames(pad2) = paste0("PAD2_",colnames(pad2))
colnames(pad4) = paste0("PAD4_",colnames(pad4))
matrix = cbind(pad2,pad4) %>% as.data.frame() %>% dplyr::select(-PAD4_V1)%>% filter( PAD2_V1 %in% (omega2 %>% unlist %>% as.character)) %>% column_to_rownames("PAD2_V1")
matrix[matrix<7] = 0
matrix[matrix>=7] = 1
matrix2 = matrix %>% rownames_to_column()%>% right_join(omega3)
matrix.3 = cbind(pad2,pad4) %>% as.data.frame() %>% dplyr::select(-PAD4_V1) %>% as.data.frame()
edge = fread("omega_blast_x.csv") %>% select(qseqid,sseqid,evalue) %>% filter(evalue < 1) %>% select(-evalue)
filter_func = function(edge,vertex){ #independence filter that takes a dictionary (defined above) and a set of nodes and tells the minimal number of unique epitopes
nodes = unlist(vertex)
links_filtered = edge %>% filter(qseqid %in% nodes)
links_filtered = links_filtered %>% filter(sseqid %in% nodes)
if(dim(links_filtered)[1]!=0){
net <- as.undirected(graph_from_data_frame(d=links_filtered,vertices=nodes, directed=F) )
x = decompose.graph(net)
x_1 = x[sapply(x,vcount)<30]
x_1_sum = sum(unlist(lapply(x_1,independence.number)))
x_2 = x[sapply(x,vcount)>=30]
temp = c()
#x_2 = x
if(length(x_2) >0){
for(R in 1:length(x_2)){
x_2_r = x_2[[R]]
while(max(degree(x_2_r)>5)){
toss = degree(x_2_r)==max(degree(x_2_r))
x_2_r = delete_vertices(x_2_r, V(x_2_r)[toss])
}
x_l = decompose.graph(x_2_r)
temp[R] = sum(unlist(lapply(x_l,independence.number)))
}
}
return(sum(x_1_sum)+sum(temp))
}
if(dim(links_filtered)[1]==0){
return(length(nodes))
}
}
df = data.frame(matrix(nrow = 0,ncol = 7))
colnames(df) = c("ID","PAD2_all","PAD4_all","PAD2_ind","PAD4_ind","PAD2_total","PAD4_total")
for(R in 2:(dim(matrix2)[2]-1)){
name = colnames(matrix2)[R]
sample.R = matrix2[,c(1,R,44)] %>% rename(Sample =name)%>% filter(Sample ==1)
sample.R.pad2 = sample.R %>% filter(PAD == "PAD2")
sample.R.pad4 = sample.R %>% filter(PAD == "PAD4")
pad2_ind = filter_func(edge,sample.R.pad2$rowname)
pad4_ind = filter_func(edge,sample.R.pad4$rowname)
matrix.3.R = matrix.3 %>% select(name) %>% filter(. >=7)
if(grepl("PAD2",name)){
PAD2_total = dim(matrix.3.R)[1]
PAD4_total = 0
}
if(grepl("PAD4",name)){
PAD4_total = dim(matrix.3.R)[1]
PAD2_total = 0
}
df[R-1,] = c(name,dim(sample.R.pad2)[1],dim(sample.R.pad4)[1],pad2_ind,pad4_ind,PAD2_total,PAD4_total)
}
bt <- function(a, b, p = 0.5) {binom.test(a, b, 0.5, alternative=
c("two.sided"), conf.level = 0.95)$p.value}
pad_sum = df %>% mutate(ID.2 = substring(ID,6,10)) %>% select(-ID) %>% group_by(ID.2) %>% summarise_all(max) %>% mutate(across(c(-ID.2), as.numeric)) %>% mutate(all_hits = PAD2_total+PAD4_total) %>% mutate(test = PAD2_ind+PAD4_ind) %>% mutate(test = ifelse(test == 0,1,test))
pad_sum$pVal <- mapply(bt, pad_sum$PAD2_ind, pad_sum$test)
pad_sum = pad_sum %>% mutate(color= ifelse(pVal <.05,"sig","insig"))
# ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = color))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+ scale_color_manual(values=c("black", "red"))+theme(text = element_text(size=20))
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = pVal))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10))+theme(text = element_text(size=20)) + scale_fill_gradient(name = "count", trans = "log",breaks = my_breaks, labels = my_breaks)
my_breaks = c(2, 10, 50, 250, 1250, 6000)
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = pVal))+geom_point()+theme(text = element_text(size=20))+theme_classic()+scale_size(range = c(0, 10)) + scale_fill_gradient(name = "count", trans = "log",breaks = my_breaks, labels = my_breaks)
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = pVal))+geom_point()+scale_size(range = c(0, 10)) + scale_fill_gradient(name = "count", trans = "log",breaks = my_breaks, labels = my_breaks)+theme(text = element_text(size=20))+theme_classic()
?scale_gradient
library(MASS)
library(tidyverse)
library(data.table)
library(ggbeeswarm)
library(pheatmap)
library(ggplot2)
library(ggpubr)
library(knitr)
library(igraph)
ggplot(pad_sum,aes(x = PAD2_all,y = PAD4_all,size = all_hits,color = pVal))+geom_point()+scale_size(range = c(0, 10)) + scale_fill_gradient(name = "count", trans = "log",breaks = my_breaks, labels = my_breaks)+theme(text = element_text(size=20))+theme_classic()
n <- 1e5
df <- data.frame(x = rexp(n), y = rexp(n))
p <- ggplot(df, aes(x = x, y = y)) + stat_binhex()
print(p)
p + scale_fill_gradient(name = "count", trans = "log",
breaks = my_breaks, labels = my_breaks)
p