-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSimplest_ABC.m
302 lines (269 loc) · 10.1 KB
/
Simplest_ABC.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
function [abc_theta,abc_weights] = Simplest_ABC
%created 13/5/15
%last edit 17/5/15
%AS simple as ABC
%implements a version of ABC via rejection sampling to fit parameters to
%some 'data' which is first simulated from the model
%Model used is velocity jump process with 1 mode
%see Turner 2012 for ABC population monte carlo tutorial
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tic;
rng(22);
close all
%Fake parameters
params.nu1 = 1.16;
params.nu2 = 0;
params.lambda = 0.42;
params.omega = 0;
params.phi = 0.58;
params.theta_0 = 0;
params.x_0=0.5;
real_params = [params.nu1, params.nu2, params.lambda, params.omega, params.phi, params.theta_0, params.x_0];
%Generate fake data
%calculate appropriate summary statistic - we choose MFPT
[mfpt_fake_data,lambda_summary_fake,nu_summary_fake] = mfpt_calculator(params,100,0,1,65)
%Choose tolerance sequence
num_generations = 3;
%delta values chosen by looking at the standard deviations of the summary
%statistics as parameters vary
delta = [400,200,100,50;
1000,500,250,125;
0.01,0.005,0.001,0.0005];
%At t=1 for first generation
N=500; %N particles at each generation
%create while loop
%set prior
prior_params = [1.16, 0, 0.42, 0, 0.58, 0, 0.5];
prior_sigma = [0.4, 0.4, 0.4]; %sd of gaussian or spread around mean of uniform
p_indices = [1, 3, 5];
n=0;
abc_theta = zeros(N,length(p_indices));
abc_weights = zeros(N,1);
fprintf('Generation 1 begins\n');
for i=1:N
mfpt_candidate_data = mfpt_fake_data + delta(1,1) + 1; %initialise greater than tolerance
lambda_summary_candidate = lambda_summary_fake + delta(2,1) + 1;
nu_summary_candidate = nu_summary_fake + delta(3,1) + 1;
while Summary_distance(mfpt_fake_data,mfpt_candidate_data, lambda_summary_fake, ...
lambda_summary_candidate, nu_summary_fake, nu_summary_candidate) > delta(:,1) %compare the summary statistic to that from original data
n=n+1;
%Gaussian prior
%rr = randn(1,3);
%Uniform prior
rr = rand(1,3);
%simulate parameters from the prior
% candidate.nu1 = 0.4 + 0.1*rr(1,j);
% candidate.nu2 = 0.08 + 0.02*rr(2,j);
% candidate.lambda = 7.7; % + 2*randn(1);
% candidate.omega = 1/6; %and this %+ 0.5*randn(1);
% candidate.phi = 0.58; %fix this too %+0.2*randn(1);
% candidate.theta_0 = 0; %these are fixed
% candidate.x_0 = 0.5; %these are fixed
par_params = prior_params;
par_params(p_indices) = prior_params(p_indices)+prior_sigma.*(rr-0.5);
%simulate data using these model parameters
%Calculate summary statistic (MFPT)
[mfpt_candidate_data,lambda_summary_candidate,nu_summary_candidate] = mfpt_calculator(par_params,20,1,0,4);
if n>10^5
fprintf('Thats more than enough parameters to look at for now.\n')
break
end
end %repeat until N acceptances have been made
abc_theta(i,:) = par_params(p_indices);
abc_weights(i) = 1/N;
end
sigma = 2*var(abc_theta);
figure(1);
subplot(3,1,1);
plot(abc_theta(:,1),abc_theta(:,2),'o');
hold all
grid on
plot(real_params(p_indices(1)),real_params(p_indices(2)),'rx','MarkerSize',12);
set(gca, 'fontsize',14);
xlabel('param1');
ylabel('param2');
subplot(3,1,2);
plot(abc_theta(:,1),abc_theta(:,3),'o');
hold all
grid on
plot(real_params(p_indices(1)),real_params(p_indices(3)),'rx','MarkerSize',12);
set(gca, 'fontsize',14);
xlabel('param1');
ylabel('param3');
subplot(3,1,3);
plot(abc_theta(:,2),abc_theta(:,3),'o');
hold all
grid on
plot(real_params(p_indices(2)),real_params(p_indices(3)),'rx','MarkerSize',12);
set(gca, 'fontsize',14);
xlabel('param2');
ylabel('param3');
%First generation for t=1 is done
%Now loop over generations
for tau=2:num_generations
%store previous theta
theta_store = abc_theta;
weights_store = abc_weights;
par_params = repmat(prior_params,N,1);
par_params(:,p_indices) = theta_store;
n=0;
tau
for i=1:N
mfpt_candidate_data = mfpt_fake_data+delta(1,tau)+1; %initialise greater than tolerance
lambda_summary_candidate = lambda_summary_fake + delta(2,tau) + 1;
nu_summary_candidate = nu_summary_fake + delta(3,tau) + 1;
while Summary_distance(mfpt_fake_data,mfpt_candidate_data, lambda_summary_fake, ...
lambda_summary_candidate, nu_summary_fake, nu_summary_candidate)>delta(:,tau) %compare the summary statistic to that from original data
n=n+1;
%simulate parameters from the prior
% candidate.nu1 = 0.4 + 0.1*rr(1,j);
% candidate.nu2 = 0.08 + 0.02*rr(2,j);
% candidate.lambda = 7.7; % + 2*randn(1);
% candidate.omega = 1/6; %and this %+ 0.5*randn(1);
% candidate.phi = 0.58; %fix this too %+0.2*randn(1);
% candidate.theta_0 = 0; %these are fixed
% candidate.x_0 = 0.5; %these are fixed
%sample params from previous iteration
u = rand(1);
my_index = 1;
%draw from discrete distribution with weights abc_weights
while cumsum(weights_store(1:my_index))/sum(weights_store)<u
my_index = my_index+1;
end
%peturb previous parameters
par_params(my_index,p_indices) = max(par_params(my_index,p_indices) + sigma.*randn(1,length(p_indices)),0); %if negative parameters are proposed, then just use zero
%simulate data using these model parameters
%Calculate summary statistic (MFPT)
[mfpt_candidate_data,lambda_summary_candidate,nu_summary_candidate] = mfpt_calculator(par_params(my_index,:),20,1,0,4);
if n>10^5
fprintf('Thats more than enough parameters to look at for now.\n')
break
end
end %repeat until N acceptances have been made
abc_theta(i,:) = par_params(my_index,p_indices);
%Gaussian prior
% prior = exp(-((abc_theta(i,1)-prior_params(p_indices(1))^2)/(2*prior_sigma(1)^2)))*...
% exp(-((abc_theta(i,2)-prior_params(p_indices(2)))^2)/(2*prior_sigma(2)^2))*...
% exp(-((abc_theta(i,3)-prior_params(p_indices(3)))^2)/(2*prior_sigma(3)^2));
%Uniform prior
prior = 1;
for jj=1:3
prior = prior.*(abc_theta(i,jj)>prior_params(p_indices(jj))-0.5*prior_sigma(jj)).*(abc_theta(i,jj)<prior_params(p_indices(jj))+0.5*prior_sigma(jj))./(prior_sigma(jj));
end
%fixed dependence on current weights so we have dependence on previous weights
abc_weights(i) = prior./(sum(weights_store.*exp(-((abc_theta(:,1)-theta_store(:,1)).^2)/(2*sigma(1)^2))...
.*exp(-((abc_theta(:,2)-theta_store(:,2)).^2)/(2*sigma(2)^2))...
.*exp(-((abc_theta(:,3)-theta_store(:,3)).^2)/(2*sigma(3)^2)))/(sigma(1)*sigma(2)*sigma(3))^2);
if isnan(abc_weights)
abc_weights
error('weights are NAN due to division by 0 in calculation of weights. Oops.');
end
end
sigma = 2*var(abc_theta);
abc_theta
figure(1);
subplot(3,1,1);
plot(abc_theta(:,1),abc_theta(:,2),'o');
hold all
grid on
plot(real_params(p_indices(1)),real_params(p_indices(2)),'rx','MarkerSize',12);
set(gca, 'fontsize',14);
xlabel('param1');
ylabel('param2');
subplot(3,1,2);
plot(abc_theta(:,1),abc_theta(:,3),'o');
hold all
grid on
plot(real_params(p_indices(1)),real_params(p_indices(3)),'rx','MarkerSize',12);
set(gca, 'fontsize',14);
xlabel('param1');
ylabel('param3');
subplot(3,1,3);
plot(abc_theta(:,2),abc_theta(:,3),'o');
hold all
grid on
plot(real_params(p_indices(2)),real_params(p_indices(3)),'rx','MarkerSize',12);
set(gca, 'fontsize',14);
xlabel('param2');
ylabel('param3');
end
length(abc_theta)
%get rid of values outside of support of prior
abc_theta = abc_theta((abc_weights>0),:); %if weight is 0 then get rid of that parameter
length(abc_theta)
figure(2);
subplot(3,1,1);
plot(abc_theta(:,1),abc_theta(:,2),'o');
hold all
grid on
plot(real_params(p_indices(1)),real_params(p_indices(2)),'rx','MarkerSize',12);
set(gca, 'fontsize',14);
xlabel('param1');
ylabel('param2');
subplot(3,1,2);
plot(abc_theta(:,1),abc_theta(:,3),'o');
hold all
grid on
plot(real_params(p_indices(1)),real_params(p_indices(3)),'rx','MarkerSize',12);
set(gca, 'fontsize',14);
xlabel('param1');
ylabel('param3');
subplot(3,1,3);
plot(abc_theta(:,2),abc_theta(:,3),'o');
hold all
grid on
plot(real_params(p_indices(2)),real_params(p_indices(3)),'rx','MarkerSize',12);
set(gca, 'fontsize',14);
xlabel('param2');
ylabel('param3');
fname = sprintf('Simplest_ABC_output2.txt');
fileID = fopen(fname,'w');
fprintf(fileID,'%f \n',abc_theta);
fclose('all');
toc
end
function [mean_fp_time, mean_num_jumps, mean_jump_distances] = mfpt_calculator(par_params,num_particles,is_parallel, non_infinite, r_random)
%runs velocityjump2D_ModesInput which is a velocity jump process for a single particle
%runs this many times for many particles and returns simulated estimates of
%mfpt
if is_parallel
params.nu1 = par_params(1);
params.nu2 = par_params(2);
params.lambda = par_params(3);
params.omega = par_params(4);
params.phi = par_params(5);
params.theta_0 = par_params(6);
params.x_0 = par_params(7);
else
params = par_params;
end
t_max = 1; %if this is too small, some runs will not reach anchoring giving infinite mfpt _> rejection.
% these are probably not an issue as they would give large mfpt anyway,
% which would be rejected. But can increase this.
anchor_times = zeros(num_particles,1);
num_jumps = zeros(num_particles,1);
jump_distances = zeros(num_particles,1);
parfor j=1:num_particles
[~, anchor_times(j), ~, ~, pathx, ~] = velocityjump2D_ModesInput(t_max, params, 1, 0, 1, 0, r_random*j);
num_jumps(j) = length(pathx);
jump_distances(j) = median(abs(diff(pathx)));
end
mean_fp_time = mean(anchor_times); % use mean first passge time as one summary statistic
mean_num_jumps = mean(num_jumps); %use mean number of jumps in a single passge as another
mean_jump_distances = mean(jump_distances); %use mean jump distance as final summary statistic
if non_infinite
while isinf(mean_fp_time)
t_max = 2*t_max;
parfor j=1:num_particles
[~, anchor_times(j), ~, ~, pathx, ~] = velocityjump2D_ModesInput(t_max, params, 1, 0, 1, 0, r_random*j);
num_jumps(j) = length(pathx);
end
mean_fp_time = mean(anchor_times);
mean_num_jumps = mean(num_jumps);
end
end
% sd_anchor_time = std(anchor_times);
end
function dist = Summary_distance(mfpt_fake_data,mfpt_candidate_data, lambda_summary_fake, lambda_summary_candidate, nu_summary_fake, nu_summary_candidate)
dist = [abs(mfpt_fake_data - mfpt_candidate_data); abs(lambda_summary_fake - lambda_summary_candidate); abs(nu_summary_fake - nu_summary_candidate)];
end