-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.m
144 lines (126 loc) · 6.6 KB
/
main.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
%% main
% load off target data
[original_sites, off_target_sites, scores, copy_number, pam_sites] = load_off_target_data();
% load primary data
[average_binding_data, average_sgRNA_Tarasava, average_host_Tarasava] = ...
load_avg_data;
% calc sd for each data point
[binding_frequency, Data_matrix_SgRNA, Data_matrix_Host,...
Data_vector_DataSet, Data_vector_CasType] = load_seq_data;
data_instance = [binding_frequency(1:30),binding_frequency(31:60),binding_frequency(61:90)];
sd = ((max(data_instance') - min(data_instance'))./4).^.5;
%% initialization of parameters
[r_crRNA, Delta_crRNA, r_Cas9, Delta_Cas9, Delta_complex, k_I, k_f, Lambda, D, V, k_d, k_c, mu] = initialize_parameters();
ODE_parameters = [r_crRNA, Delta_crRNA, r_Cas9, Delta_Cas9, Delta_complex, k_I, k_f, Lambda, D, V, k_d, k_c, mu]';
integration_time = (10^4); % now consider steady state conditions
integration_accuracy = 1; % mutiple of 1 evaluation per minute
Cas_concentration = 25; % concentration of Cas molecule injected to start nanoleters
crRNA_concentration = 25; % concentration of crRNA molecule injected to start nanoleters
on_target_copy_number = 1; % number of duplicates of a host site (off target)
off_target_copy_number = copy_number; % number of duplicates of a host site (off target)
percent_off_target = 1; % written as percentage
%[GG, GC, GA, GT, CG, CC, CA, CT, AG, AC, AA, AT, TG, TC, TA, TT ];
% second value is the gRNA first is the host
match_mismatch_params = -1*[-1.97, -2.70, -1.66, -2.04,...
-1.44, -1.97, -0.78, -1.29,...
-1.29, -2.04, -1.04, -1.27,...
-0.78, -1.66, -0.12, -1.04]';
% ['ATG', 'GAG', 'GGA', 'GAA', 'GAT']
pam_parameters = -1*[-6.6, -6.9, -9.4, -6.8, -6.9]/max(-1*[-6.6, -6.9, -9.4, -6.8, -6.9]);
% train model parameters
max_iterations = 2*10^3;
Number_of_Nucleotides = 32;
stopping_Tol = .1;
off_target_weight = .00; % the error from off targeting will be worth 1/10 of the true data
old_info = 'model_parameters_free.mat';
%% train
% call function
use_old = 1;
[model_parameters, just_trained] = train_model(average_host_Tarasava, average_sgRNA_Tarasava,...
average_binding_data, max_iterations,...
use_old, Number_of_Nucleotides,...
old_info, ODE_parameters, off_target_sites,...
integration_time,integration_accuracy,...
Cas_concentration, crRNA_concentration, ...
on_target_copy_number, off_target_copy_number, pam_sites, ...
percent_off_target, off_target_weight, stopping_Tol);
%save parameters
model_parameters_free = model_parameters;
save(old_info, 'model_parameters_free')
save('model_parameters_free1.mat', 'just_trained')
disp(model_parameters_free)
%% test values
add_plot = 0;
load(old_info)
percent_off_target = 1;
[error, P_on_target, P__binding_rate] = test_model(average_host_Tarasava, average_sgRNA_Tarasava,...
average_binding_data, just_trained, Number_of_Nucleotides, ...
off_target_sites, integration_time, integration_accuracy,...
Cas_concentration, crRNA_concentration, on_target_copy_number, ...
off_target_copy_number, off_target_weight, ODE_parameters, ...
pam_sites, add_plot);
close all
% accuracy plot on real data
hold on
figure(1)
b = bar(linspace(1,length(P__binding_rate),length(P__binding_rate)),[P__binding_rate average_binding_data]);
b(2).FaceColor = 'r';
errorbar(1:30, average_binding_data, sd, '-')
title('observed and predicted binding')
xlabel('distance from current position backward')
ylabel('function evaluation')
title('True versus Predicted Bind ing within dCas9 System')
xlabel('Data Point')
ylabel('Predicted Binding')
%% reduce number of parameters via fourier series
current_parameters = just_trained(6:end);
[a,b,yfit] = Fseries(linspace(-1,1,67),current_parameters,12);
yfit(1) = just_trained(1); yfit(end) = just_trained(end);
new_params = [just_trained(1:5), yfit];
add_plot = 0;
load(old_info)
percent_off_target = 1;
[error, P_on_target, P__binding_rate] = test_model(average_host_Tarasava, average_sgRNA_Tarasava,...
average_binding_data, new_params, Number_of_Nucleotides, ...
off_target_sites, integration_time, integration_accuracy,...
Cas_concentration, crRNA_concentration, on_target_copy_number, ...
off_target_copy_number, off_target_weight, ODE_parameters, ...
pam_sites, add_plot);
disp(error)
close all
% accuracy plot on real data
hold on
figure(1)
b = bar(linspace(1,length(P__binding_rate),length(P__binding_rate)),[P__binding_rate average_binding_data]);
b(2).FaceColor = 'r';
errorbar(1:30, average_binding_data, sd, '-')
title('observed and predicted binding')
xlabel('distance from current position backward')
ylabel('function evaluation')
title('True versus Predicted Bind ing within dCas9 System')
xlabel('Data Point')
ylabel('Predicted Binding')
%% look at parameter sensetivity
h = .1;
gradients = parameter_sensetivity(average_host_Tarasava, average_sgRNA_Tarasava,...
average_binding_data, max_iterations,...
use_old, Number_of_Nucleotides,...
old_info, ODE_parameters, off_target_sites,...
integration_time, integration_accuracy,...
Cas_concentration, crRNA_concentration, ...
on_target_copy_number, off_target_copy_number, pam_sites, ...
percent_off_target, off_target_weight, stopping_Tol,h);
%% now do weighted least squares
[a0,a,b,yfit] = wls_fourier_approx(linspace(-1,1,67), just_trained(6:end), 40, abs(gradients(6:end,1)));
plot(linspace(-1,1,67), yfit); hold on; plot(linspace(-1,1,67), just_trained(6:end))
new_params = [just_trained(1:5), yfit];
add_plot = 0;
load(old_info)
percent_off_target = 1;
[error, P_on_target, P__binding_rate] = test_model(average_host_Tarasava, average_sgRNA_Tarasava,...
average_binding_data, new_params, Number_of_Nucleotides, ...
off_target_sites, integration_time, integration_accuracy,...
Cas_concentration, crRNA_concentration, on_target_copy_number, ...
off_target_copy_number, off_target_weight, ODE_parameters, ...
pam_sites, add_plot);
disp(error)