all folders in place

QingyunSun · Apr 25, 2015 · c533ed1 · c533ed1
1 parent b01459c
commit c533ed1
Show file tree

Hide file tree

Showing 9 changed files with 446 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -2,6 +2,8 @@
 This set of Matlab codes reproduce the figures and experimental results published in our paper: 
 > Ju Sun, Qing Qu, John Wright. Complete Dictionary Recovery over the Sphere. 
 
-+ Folder *image_experiment_focm*: to reproduce the results in Figure 1. Run test_images.m to start. 
++ Folder **image_experiment_focm**: to reproduce the results in Figure 1. Run *test_images.m* to start. 
++ Folder **landscape**: to reproduce Figure 2 (center) and Figure 4. 
++ Folder **simulation**: to reproduce the phase transition in Figure 5. Run *single_vector_pt.m* to start. 
 
 Codes written by Ju Sun, Qing Qu, and John Wright. Questions or bug reports please send email to Ju Sun, [email protected] 
diff --git a/README.md~ b/README.md~
@@ -2,6 +2,8 @@
 This set of Matlab codes reproduce the figures and experimental results published in our paper: 
 > Ju Sun, Qing Qu, John Wright. Complete Dictionary Recovery over the Sphere. 
 
-+ Folder *image_experiment_focm*: to reproduce the results in Figure 1. Run test_images.m to start. 
++ Folder **image_experiment_focm**: to reproduce the results in Figure 1. Run *test_images.m* to start. 
++ Folder **landscape**: to reproduce Figure 2 (center) and Figure 4. 
++ Folder **simulation**: to reproduce the phase transition in Figure 5. Run *single_vector_pt.m* to start. 
 
 Codes written by Ju Sun, Qing Qu, and John Wright. Questions or bug reports please send email to Ju Sun, [email protected] 
diff --git a/landscape/plot_obj_LSE.m b/landscape/plot_obj_LSE.m
@@ -0,0 +1,113 @@
+clc;close all; clear all;
+% Reproducing the reparameterized function landscape in Fig. 2 and Fig. 4 of the paper
+% "Complete Dictionary Recovery over the Sphere",
+% by Ju Sun, Qing Qu, and John Wright.
+% Dictionary Learning (DL) Problem formulation: Y = A_0*X_0,
+% A_0: n-by-n orthogonal matrix, fixed to be A_0 = I;
+% X_0: n-by-p matrix, row independent or column sparse
+% Reparameterize q = [w ; sqrt(1-||w||^2)]', ||w||<=1,
+% plot the function landscape h_mu = mu*log cosh(q'*Y/mu) over w.
+% Code written by Ju Sun, Qing Qu and John Wright. Current version updated
+% on 04/24/2015.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+rng(1,'twister'); % fix the seed for random number generation
+
+%% parameter settings
+tVec = 0:.02:1;
+thetaVec = 0:.02:2*pi;
+numPts = length(tVec) * length(thetaVec);
+
+mu = 1/50;% smoothing parameters
+theta = 0.9;% sparsity
+p = 100000;% number of samples
+
+% data model selection:
+% 1 = planted sparse vector model
+% 2 = DL model, independent row, i.i.d. Bernoulli-Gaussian
+% 3 = DL model, independent row, i.i.d. Bernoulli-Uniform
+% 4 = DL model, correlated row, Bernoulli-Gaussian
+% 5 = DL model, correlated row, Bernoulli-Uniform
+choice = 1;
+
+
+%% generate random data
+% generate correlation matrix
+A = 0.1*randn(3, 3);
+A(1, 1) = 1;
+A(2, 2) = 1;
+A(3, 3) = 1;
+B = A' + A;
+% generate data
+switch choice
+ case 1
+ % PSV
+% X = (2*double(rand(3,p) < .5) - 1) .* [randn(2,p); double( rand(1,p) < theta ) / sqrt(theta)];
+ X = [randn(2,p); randn(1,p) .* double( rand(1,p) < theta ) / sqrt(theta)];
+ case 2
+ X = randn(3,p) .* double( rand(3,p) < theta );
+ case 3
+ X = 0.1 * (rand(3,p)-0.5) .* double( rand(3,p) < theta );
+ case 4
+ X = 0.1*(B *randn(3,p)).* double( rand(3,p) < theta );
+ case 5
+ X = 0.1*(B *(rand(3,p)-0.5)).* double( rand(3,p) < theta );
+end
+
+fnVals = zeros(numPts,1);
+uVals = zeros(numPts,1);
+vVals = zeros(numPts,1);
+
+%% evaluate function values
+k = 1;
+for i = 1:length(tVec)
+ if mod(i,50) == 1,
+ disp(i);
+ end
+
+ for j = 1:length(thetaVec),
+ t = tVec(i);
+ theta = thetaVec(j);
+
+ u = t*cos(theta);
+ v = t*sin(theta);
+
+ q = [ u; v; sqrt(1-u^2-v^2) ];
+
+ % calculate the gradient of the objective wrt w = [u v]
+ z = q'*X;
+
+ fnVals(k) = sum( mu * log( (exp( z / mu ) + exp( -z / mu )) / 2 ) ) / p;
+
+ uVals(k) = u;
+ vVals(k) = v;
+
+ k = k + 1;
+ end
+end
+
+%% plot function landscape
+figure(1); clf; plot3( uVals, vVals, fnVals, 'b.' );
+
+hold on;
+
+% plot a circle
+thetaVec = 0:.001:(2*pi);
+
+cx = zeros(length(thetaVec),1);
+cy = zeros(length(thetaVec),1);
+cz = zeros(length(thetaVec),1);
+
+for i = 1:length(thetaVec),
+ theta = thetaVec(i);
+
+ cx(i) = cos(theta);
+ cy(i) = sin(theta);
+end
+
+cz = min( fnVals ) * ones(size(cz));
+
+[x, y, z] = ellipsoid(0,0,min(real(fnVals)),1,1,0,30);
+surf(x, y, z, 25 * ones(size(z)), 'EdgeColor', 'none', 'CDataMapping', 'direct');
+
+plot3(cx,cy,cz,'k-','LineWidth',2);
diff --git a/landscape/plot_obj_LSE.m~ b/landscape/plot_obj_LSE.m~
@@ -0,0 +1,113 @@
+clc;close all; clear all;
+% Reproducing the reparameterized function landscape in Fig. 2 and Fig. 4 of the paper
+% "Complete Dictionary Recovery over the Sphere",
+% by Ju Sun, Qing Qu, and John Wright.
+% Dictionary Learning (DL) Problem formulation: Y = A_0*X_0,
+% A_0: n-by-n orthogonal matrix, fixed to be A_0 = I;
+% X_0: n-by-p matrix, row independent or column sparse
+% Reparameterize q = [w ; sqrt(1-||w||^2)]', ||w||<=1,
+% plot the function landscape h_mu = mu*log cosh(q'*Y/mu) over w.
+% Code written by Ju Sun, Qing Qu and John Wright. Current version updated
+% on 04/24/2015.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+rng(1,'twister'); % fix the seed for random number generation
+
+%% parameter settings
+tVec = 0:.02:1;
+thetaVec = 0:.02:2*pi;
+numPts = length(tVec) * length(thetaVec);
+
+mu = 1/50;% smoothing parameters
+theta = 0.9;% sparsity
+p = 100000;% number of samples
+
+% data model selection:
+% 1 = planted sparse vector model
+% 2 = DL model, independent row, i.i.d. Bernoulli-Gaussian
+% 3 = DL model, independent row, i.i.d. Bernoulli-Uniform
+% 4 = DL model, correlated row, Bernoulli-Gaussian
+% 5 = DL model, correlated row, Bernoulli-Uniform
+choice = 1;
+
+
+%% generate random data
+% generate correlation matrix
+A = 0.1*randn(3, 3);
+A(1, 1) = 1;
+A(2, 2) = 1;
+A(3, 3) = 1;
+B = A' + A;
+% generate data
+switch choice
+ case 1
+ % PSV
+% X = (2*double(rand(3,p) < .5) - 1) .* [randn(2,p); double( rand(1,p) < theta ) / sqrt(theta)];
+ X = [randn(2,p); randn(1,p) .* double( rand(1,p) < theta ) / sqrt(theta)];
+ case 2
+ X = randn(3,p) .* double( rand(3,p) < theta );
+ case 3
+ X = 0.1 * (rand(3,p)-0.5) .* double( rand(3,p) < theta );
+ case 4
+ X = 0.1*(B *randn(3,p)).* double( rand(3,p) < theta );
+ case 5
+ X = 0.1*(B *(rand(3,p)-0.5)).* double( rand(3,p) < theta );
+end
+
+fnVals = zeros(numPts,1);
+uVals = zeros(numPts,1);
+vVals = zeros(numPts,1);
+
+%% evaluate function values
+k = 1;
+for i = 1:length(tVec)
+ if mod(i,50) == 1,
+ disp(i);
+ end
+
+ for j = 1:length(thetaVec),
+ t = tVec(i);
+ theta = thetaVec(j);
+
+ u = t*cos(theta);
+ v = t*sin(theta);
+
+ q = [ u; v; sqrt(1-u^2-v^2) ];
+
+ % calculate the gradient of the objective wrt w = [u v]
+ z = q'*X;
+
+ fnVals(k) = sum( mu * log( (exp( z / mu ) + exp( -z / mu )) / 2 ) ) / p;
+
+ uVals(k) = u;
+ vVals(k) = v;
+
+ k = k + 1;
+ end
+end
+
+%% plot function landscape
+figure(1); clf; plot3( uVals, vVals, fnVals, 'b.' );
+
+hold on;
+
+% plot a circle
+thetaVec = 0:.001:(2*pi);
+
+cx = zeros(length(thetaVec),1);
+cy = zeros(length(thetaVec),1);
+cz = zeros(length(thetaVec),1);
+
+for i = 1:length(thetaVec),
+ theta = thetaVec(i);
+
+ cx(i) = cos(theta);
+ cy(i) = sin(theta);
+end
+
+cz = min( fnVals ) * ones(size(cz));
+
+[x, y, z] = ellipsoid(0,0,min(real(fnVals)),1,1,0,30);
+surf(x, y, z, 25 * ones(size(z)), 'EdgeColor', 'none', 'CDataMapping', 'direct');
+
+plot3(cx,cy,cz,'k-','LineWidth',2);
diff --git a/simulation/Solve_TR_Subproblem.m b/simulation/Solve_TR_Subproblem.m
@@ -0,0 +1,33 @@
+function [x,optval] = Solve_TR_Subproblem(A,b,Delta)
+
+% solve_trust_region_subproblem by SDP relaxation
+% min (1/2) z' A z + z' b s.t. ||z||_2 <= Delta
+%
+% for z \in R^n via an (exact) convex relaxation by SDP lifting
+% min < [ A b; b' 0 ], Z > s.t. Z >= 0, Z_{n+1,n+1} = 1, trace(Z) = 2.
+
+G = (1/2) * [ A, b; b', 0 ];
+n = length(b);
+
+cvx_quiet(1);
+
+cvx_begin sdp
+variable X(n+1,n+1) symmetric;
+minimize(trace(G'*X))
+subject to
+trace(X) <= 1+Delta^2;
+X(n+1,n+1) == 1;
+X >= 0;
+cvx_end
+
+% recover the optimal direction by the leading eigenvector using SVD 
+[u,~,~] = svds(X,1);
+x = u(1:n);
+
+optval = cvx_optval;
+nx = - x;
+if (1/2) * nx'*A*nx + b'*nx < (1/2) * x'* A * x + b' * x
+ x = nx;
+end
+
+end
diff --git a/simulation/TR_Sphere.m b/simulation/TR_Sphere.m
@@ -0,0 +1,75 @@
+% TR_sphere.m: trust region method over the sphere for recovering a single
+% sparse vector.
+% Recover one sparsest row of Y by solving:
+% min_q h_mu(q' * Y), s.t. ||q|| =1.
+function q = TR_Sphere( Y, mu, q_init )
+[n,~] = size(Y);
+% initalize q
+if nargin > 2,
+ q = q_init;
+else
+ q = randn(n,1);% random initialization
+ q = q / norm(q);
+end
+
+%% Parameter Settings
+
+tol = 1e-6;% stopping criteria for the gradient
+MaxIter = 200;% max iteration
+Delta = 0.1;% inital trust region size
+Delta_max = 1;% maximum trust region size
+Delta_min = eps;% minimum trust region size
+
+% parameters for adjusting trust region size
+eta_s = 0.1;
+eta_vs = 0.9;
+gamma_i = 2;
+gamma_d = 1/2;
+
+
+%% Main Iteration
+for iter = 1:MaxIter
+ U = null(q'); % basis of the tangent space T_q = { v| v'*q =0}
+ [f,g,H] = l1_exp_approx(Y,q,mu,true); % evaluate the function, gradient and hessian at q
+
+ %solve TR-subprolbem
+ A = U' * H * U; % hessian in the tangent space T_q
+ b = U' * g; % gradient in the tangent space T_q
+ [xi,opt] = Solve_TR_Subproblem(A,b,Delta); % optimal direction xi in the tangent space T_q
+
+ t = norm(xi);
+ q_hat = q * cos(t) + (U*xi/t) * sin(t);% retraction back to the sphere by exponential map
+
+ % evaluate the trust region ratio: rho
+ f_new = l1_exp_approx(Y,q_hat,mu,false);
+ f_hat = f + b' * xi + (1/2) * xi' * A * xi;
+ rho = (f - f_new)/(f-f_hat);
+
+ % update iterate q and trust-region size Delta
+ flag = 0;
+ if rho>eta_vs
+ q = q_hat; flag = 1;
+ Delta = min(gamma_i*Delta,Delta_max);
+ elseif rho>eta_s
+ q = q_hat; flag = 1;
+ else
+ Delta = max(gamma_d*Delta,Delta_min);
+ end
+ [~,idx] = max(abs(q));
+ ei = zeros(n,1);
+ ei(idx) = sign(q(idx));
+ err = min(norm( q - ei ),norm(q +ei));
+ fprintf('Iter = %d, Obj = %f, Err = %f, TR Size = %f, Step = %f, rho = %f \n',iter,f,err, Delta,t,rho);
+
+ % this is cheating for speed; should be removed for reproduction 
+ if err<mu/2
+ break;
+ end
+
+ if (flag ==1&&abs((f_new - f)/t)<tol);
+ break;
+ end
+
+end
+
+end
diff --git a/simulation/l1_exp_approx.m b/simulation/l1_exp_approx.m
@@ -0,0 +1,21 @@
+% evaluate the value f, gradient g and hessian H 
+% for the function h_mu(q'*Y) 
+function [f,g,H] = l1_exp_approx(Y,q,mu,flag)
+f=[];g=[];H =[];
+z = q'*Y;
+t = z/mu;
+ind = abs(t)>50;%for the purpose of preventing overflow
+n = size(Y,1);
+
+f_each_1 = mu * log( cosh(t(~ind)) );
+f_each_2 = abs(z(ind)) - mu*log(2);
+f = sum(f_each_1)+sum(f_each_2); %function value
+
+if(flag == true) 
+ g_each = tanh(t);
+ H_each = (1/mu) * (1-g_each.^2); 
+ g = Y * g_each';% gradient 
+ H = (Y.* repmat(H_each, n, 1))*Y'; %hessian H = Y * diag(H_each) * Y
+end
+
+end
diff --git a/simulation/p=5n^3.fig b/simulation/p=5n^3.fig