Skip to content

Commit

Permalink
Fixed unnoticed bug in PCA (Thanks to https://github.com/wihoho for r…
Browse files Browse the repository at this point in the history
…eporting!). Added Eigenvalues to the model, if someone needs it for evaluation. LDA now returns only the chosen number of components (although you often want all).
  • Loading branch information
Philipp Wagner committed Mar 7, 2013
1 parent b634d4d commit 5fd29d9
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 147 deletions.
61 changes: 30 additions & 31 deletions m/models/eigenfaces.m
Original file line number Diff line number Diff line change
@@ -1,35 +1,34 @@
function model = eigenfaces(X, y, num_components)
%% Performs a PCA on X and stores num_components principal components.
%%
%% Args:
%% X [dim x num_data] input data
%% y [1 x num_data] classes
%% num_components [int] components to keep
%%
%% Out:
%% model [struct] learned model
%% .name [char] name
%% .mu [dim x 1] sample mean of X
%% .num_components [int] number of components in this model
%% .W [dim x num_components] components identified by PCA
%% .P [num_components x num_data] projection of X
%%
%% Example:
%% m_eigenface = eigenfaces(X, y, 100)
if(nargin < 3)
num_components=size(X,2)-1;
end

% perform pca
Pca = pca(X, num_components);

%% Performs a PCA on X and stores num_components principal components.
%%
%% Args:
%% X [dim x num_data] input data
%% y [1 x num_data] classes
%% num_components [int] components to keep
%%
%% Out:
%% model [struct] learned model
%% .name [char] name
%% .mu [dim x 1] sample mean of X
%% .num_components [int] number of components in this model
%% .W [dim x num_components] components identified by PCA
%% .P [num_components x num_data] projection of X
%%
%% Example:
%% m_eigenface = eigenfaces(X, y, 100)
if(nargin < 3)
num_components=size(X,2)-1;
end
% perform pca
Pca = pca(X, num_components);
% build model
model.name = 'eigenfaces';
model.W = Pca.W;
model.num_components = num_components;
model.mu = Pca.mu;
% project data
model.P = model.W'*(X - repmat(Pca.mu, 1, size(X,2)));
% store classes
model.y = y;
model.D = Pca.D;
model.W = Pca.W;
model.num_components = num_components;
model.mu = Pca.mu;
% project data
model.P = model.W'*(X - repmat(Pca.mu, 1, size(X,2)));
% store classes
model.y = y;
end
78 changes: 40 additions & 38 deletions m/models/fisherfaces.m
Original file line number Diff line number Diff line change
@@ -1,40 +1,42 @@
function model = fisherfaces(X, y, num_components)
%% Fisherfaces (see Python version for description)
%%
%% Args:
%% X [dim x num_data] input data
%% y [1 x num_data] classes
%% num_components [int] components to keep
%%
%% Out:
%% model [struct] learned model
%% .name [char] name
%% .mu [dim x 1] sample mean of X
%% .num_components [int] number of components in this model
%% .W [dim x num_components] components identified by LDA
%% .P [num_components x num_data] projection of X
%%
%% Example:
%% m_fisherface = fisherface(X, y)

N = size(X,2);
c = max(y);

% set the num_components
if(nargin==2)
num_components=c-1;
end
num_components = min(c-1,num_components);

% reduce dim(X) to (N-c) (see paper [BHK1997])
Pca = pca(X,y,(N-c));
Lda = lda(project(X, Pca.W, Pca.mu), y, num_components);

% build model
model.name = 'lda';
model.mu = repmat(0, size(X,1), 1);
model.W = Pca.W*Lda.W;
model.P = model.W'*X;
model.num_components = Lda.num_components;
model.y = y;
%% Fisherfaces (see Python version for description)
%%
%% Args:
%% X [dim x num_data] input data
%% y [1 x num_data] classes
%% num_components [int] components to keep
%%
%% Out:
%% model [struct] learned model
%% .name [char] name
%% .mu [dim x 1] sample mean of X
%% .num_components [int] number of components in this model
%% .W [dim x num_components] components identified by LDA
%% .P [num_components x num_data] projection of X
%%
%% Example:
%% m_fisherface = fisherface(X, y)

N = size(X,2);
c = max(y);

% set the num_components
if(nargin==2)
num_components=c-1;
end

num_components = min(c-1, num_components);

% reduce dim(X) to (N-c) (see paper [BHK1997])
Pca = pca(X, y, (N-c));
Lda = lda(project(X, Pca.W, Pca.mu), y, num_components);

% build model
model.name = 'lda';
model.mu = repmat(0, size(X,1), 1);
model.D = Lda.D;
model.W = Pca.W*Lda.W;
model.P = model.W'*X;
model.num_components = Lda.num_components;
model.y = y;
end
105 changes: 53 additions & 52 deletions m/models/lda.m
Original file line number Diff line number Diff line change
@@ -1,55 +1,56 @@
function model = lda(X, y, num_components)
%% Performs a Linear Discriminant Analysis and returns the
%% num_components components sorted descending by their
%% eigenvalue.
%%
%% num_components is bound to the number of classes, hence
%% num_components = min(c-1, num_components)
%%
%% Args:
%% X [dim x num_data] input data
%% y [1 x num_data] classes
%% num_components [int] number of components to keep
%%
%% Returns:
%% model [struct] Represents the learned model.
%% .name [char] name of the model
%% .num_components [int] number of components in this model
%% .W [array] components identified by LDA
%%
dim = size(X,1);
c = max(y);
if(nargin==2)
num_components = c - 1
end
num_components = min(c-1,num_components);
meanTotal = mean(X,2);
Sw = zeros(dim, dim);
Sb = zeros(dim, dim);
for i=1:c
Xi = X(:,find(y==i));
meanClass = mean(Xi,2);
% center data
Xi = Xi - repmat(meanClass, 1, size(Xi,2));
% calculate within-class scatter
Sw = Sw + Xi*Xi';
% calculate between-class scatter
Sb = Sb + size(Xi,2)*(meanClass-meanTotal)*(meanClass-meanTotal)';
end
%% Performs a Linear Discriminant Analysis and returns the
%% num_components components sorted descending by their
%% eigenvalue.
%%
%% num_components is bound to the number of classes, hence
%% num_components = min(c-1, num_components)
%%
%% Args:
%% X [dim x num_data] input data
%% y [1 x num_data] classes
%% num_components [int] number of components to keep
%%
%% Returns:
%% model [struct] Represents the learned model.
%% .name [char] name of the model
%% .num_components [int] number of components in this model
%% .W [array] components identified by LDA
%%
dim = size(X,1);
c = max(y);
if(nargin < 3)
num_components = c - 1
end
num_components = min(c-1,num_components);
meanTotal = mean(X,2);
Sw = zeros(dim, dim);
Sb = zeros(dim, dim);
for i=1:c
Xi = X(:,find(y==i));
meanClass = mean(Xi,2);
% center data
Xi = Xi - repmat(meanClass, 1, size(Xi,2));
% calculate within-class scatter
Sw = Sw + Xi*Xi';
% calculate between-class scatter
Sb = Sb + size(Xi,2)*(meanClass-meanTotal)*(meanClass-meanTotal)';
end

% solve the eigenvalue problem
[V, D] = eig(Sb,Sw);

% sort eigenvectors descending by eigenvalue
[D,idx] = sort(diag(D),1,'descend');
V = V(:,idx);

% build model
model.name = 'lda';
model.num_components = num_components;
model.W = V(:,1:(c-1));
% solve the eigenvalue problem
[V, D] = eig(Sb,Sw);

% sort eigenvectors descending by eigenvalue
[D,idx] = sort(diag(D),1,'descend');
V = V(:,idx);

% build model
model.name = 'lda';
model.num_components = num_components;
model.D = D(1:num_components);
model.W = V(:,1:num_components);
end
53 changes: 27 additions & 26 deletions m/models/pca.m
Original file line number Diff line number Diff line change
@@ -1,33 +1,34 @@
function model = pca(X, y, num_components)
%% Performs a PCA on X and stores num_components principal components.
%%
%% Args:
%% X [dim x num_data] Input
%% y [1 x num_data] Classes
%% num_components [int] Number of components to use.
%%
%% Out:
%% model [struct] learned model
%% .name [char] name of this model
%% .W [dim x num_components] components identified by PCA
%% .num_components [int] mumber of components used in this model.
%% .mu [dim x 1] sample mean of X
%%
%% Example:
%% pca(X, y, struct('num_components',100))
%%
if(nargin < 3)
num_components=size(X,2)-1;
end
% center data
function model = pca(X, num_components)
%% Performs a PCA on X and stores num_components principal components.
%%
%% Args:
%% X [dim x num_data] Input
%% y [1 x num_data] Classes
%% num_components [int] Number of components to use.
%%
%% Out:
%% model [struct] learned model
%% .name [char] name of this model
%% .W [dim x num_components] components identified by PCA
%% .num_components [int] mumber of components used in this model.
%% .mu [dim x 1] sample mean of X
%%
%% Example:
%% pca(X, y, struct('num_components',100))
%%
if(nargin < 2)
num_components=size(X,2)-1;
end
% center data
mu = mean(X,2);
X = X - repmat(mu, 1, size(X,2));
% svd on centered data == pca
[E,D,V] = svd(X ,'econ');

% build model
model.name = 'pca';
model.W = E(:,1:num_components);
model.num_components = num_components;
model.mu = mu;
model.D = diag(D).^2;
model.D = model.D(1:num_components);
model.W = E(:,1:num_components);
model.num_components = num_components;
model.mu = mu;
end

0 comments on commit 5fd29d9

Please sign in to comment.