-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathsynthetic_2nd_order_censored.m
119 lines (84 loc) · 3.04 KB
/
synthetic_2nd_order_censored.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
%function synthetic_1st_order
% demo script for regression with HKL
clear all
%addpath(genpath(pwd))
% fixing the seed of the random generators
%seed=0;
%randn('state',seed);
%rand('state',seed);
bigp = 4
% toy example characteristics
p = 2^bigp;%1024; % total number of variables (used to generate a Wishart distribution)
psub = 3; % kept number of variables = dimension of the problem
n = 100; % number of observations
s = 3; % number of relevant variables
noise_std = .02; % standard deviation of noise
proptrain = .5; % proportion of data kept for training (the rest is used for testing)
% generate random covariance matrix from a Wishart distribution
Sigma_sqrt = randn(p,p);
Sigma = Sigma_sqrt' * Sigma_sqrt;
% normalize to unit trace and sample
diagonal = diag(Sigma);
Sigma = diag( 1./diagonal.^.5) * Sigma * diag( 1./diagonal.^.5);
Sigma_sqrt = Sigma_sqrt * diag( 1./diagonal.^.5);
X = randn(n,p);% * Sigma_sqrt;
X = X(:,1:psub);
p=psub;
% generate nonlinear function of X as the sum of all cross-products
J = 1:s; % select the first s variables
Y = zeros(n,1);
%for i=1:s
% Y = Y + sin(X(:,J(i))); %Works!!!
%end
for i=1:s
for j=1:i-1
Y = Y + sin(sin(X(:,J(i)) .* sin(X(:,J(j)))));
end
end
% normalize to unit standard deviation
Y = Y / std(Y);
% add some noise with known standard deviation
Y = Y + randn(n,1) * noise_std;
% split data in two groups
%ntrain = round(n*proptrain);
%ntest = n - ntrain;
%rp = randperm(n);
%trainset = rp(1:ntrain);
%testset = rp(ntrain+1:end);
% censored data
trainset = X(:,1) < 0 | X(:,2) < 0 | X(:,3) < 0;
testset = ~trainset;
test_size = length(testset);
X_all = X;
y_all = Y;
% split into training and test
X_test = X_all(testset,:);
y_test = y_all(testset);
X = X_all(trainset,:);
y = y_all(trainset);
[N,D] = size(X);
%plot3( X(:,1), X(:,2), y, '.' )
% Instead of restarting all the covariance parameters, just keep the
% old set and add one new parameter, also initialized to its previous
% value.
%hyp.cov = [hyp.cov hyp.cov(end)];
likfunc = 'likGauss'; sn = 0.1; hyp.lik = log(sn);
inference = @infExact;
meanfunc = {'meanConst'}; hyp.mean = 0;
R = 3;
covfunc = { 'covADD',{1:R,'covSEiso'} }; % Construct an additive kernel
hyp.cov = [ log(ones(1,2*D)), log(ones(1,R))]; % Set hyperparameters.
evalc('hyp = minimize(hyp, @gp, -100, inference, meanfunc, covfunc, likfunc, X, y);');
[predictions, ~, ~, ~, lp] = gp(hyp, inference, meanfunc, covfunc, likfunc, X, y, X_test, y_test);
hyp_add = hyp;
[lengthscales, variances, scaled_order_variances] = sort_additive_hypers( D, hyp.cov )
error_add = mean((predictions - y_test).^2)
logprob_add = mean(lp)
covfunc = { 'covSEard' }; % Construct an additive kernel
hyp.cov = [ log(ones(1,D)), log(1)]; % Set hyperparameters.
evalc('hyp = minimize(hyp, @gp, -100, inference, meanfunc, covfunc, likfunc, X, y);');
[predictions, ~, ~, ~, lp] = gp(hyp, inference, meanfunc, covfunc, likfunc, X, y, X_test, y_test);
exp(hyp.cov)
error = mean((predictions - y_test).^2)
logprob = mean(lp)
%end