Skip to content
This repository has been archived by the owner on Feb 22, 2023. It is now read-only.

Commit

Permalink
Add quick retrain script.
Browse files Browse the repository at this point in the history
  • Loading branch information
Milind Ganjoo committed May 31, 2013
1 parent 12cdc94 commit 0666d51
Show file tree
Hide file tree
Showing 2 changed files with 189 additions and 1 deletion.
12 changes: 11 additions & 1 deletion doOutlierDetection.m
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
function [ sortedOutlierIdxs ] = doOutlierDetection(method, XmapTrain, YmapTrain, XoutlierTrain, theta, trainParams, wordTable, params, zeroCategories)
function [ sortedOutlierIdxs, returnedParams ] = doOutlierDetection(method, XmapTrain, YmapTrain, XoutlierTrain, theta, trainParams, wordTable, params, zeroCategories)

numCategories = size(wordTable, 2);
nonZeroCategories = setdiff(1:numCategories, zeroCategories);
Expand Down Expand Up @@ -51,18 +51,28 @@
disp('Training Gaussian classifier using Mixture of Gaussians');
[mu, sigma, priors] = trainGaussianDiscriminant(mappedTrainImages1, YmapTrain1, numCategories, wordTable1);
[~, sortedOutlierIdxs] = sort(predictGaussianDiscriminant(mappedOutlierImages1, mu, sigma, priors, zeroCategories));
returnedParams.mu = mu;
returnedParams.sigma = sigma;
returnedParams.priors = priors;
elseif strcmp(method, 'gaussianPdf')
% Train Gaussian classifier
disp('Training Gaussian classifier using Mixture of Gaussians PDF');
[mu, sigma, priors] = trainGaussianDiscriminant(mappedTrainImages1, YmapTrain1, numCategories, wordTable1);
[~, sortedOutlierIdxs] = sort(predictGaussianDiscriminantMin(mappedOutlierImages1, mu, sigma, zeroCategories));
returnedParams.mu = mu;
returnedParams.sigma = sigma;
returnedParams.priors = priors;
elseif strcmp(method, 'loop')
disp('Training LoOP model');
knn = 20;
bestLambdas = [13, 10, 13, 12, 10, 10, 13, 10];
% bestLambdas = randi(4, 1, length(nonZeroCategories)) + 8;
[ nplofAll, pdistAll ] = trainOutlierPriors(mappedTrainImages1, YmapTrain1, nonZeroCategories, size(topNeighborsTrain, 2), knn, bestLambdas);
[~, sortedOutlierIdxs] = sort(calcOutlierPriors(mappedOutlierImages1, mappedTrainImages1, YmapTrain1, size(topNeighborsTrain, 2), nonZeroCategories, bestLambdas, knn, nplofAll, pdistAll ), 'descend');
returnedParams.nplofAll = nplofAll;
returnedParams.pdistAll = pdistAll;
returnedParams.knn = 20;
returnedParams.bestLambdas = bestLambdas;
end

end
Expand Down
178 changes: 178 additions & 0 deletions traintestGaussianHack.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
addpath anomalyFunctions/;
addpath toolbox/;
addpath toolbox/minFunc/;
addpath toolbox/pwmetric/;
addpath costFunctions/;

fields = {{'dataset', 'animals'};
{'wordset', 'acl'};
{'resolution', 11};
};

% Load existing model parameters, if they exist
for i = 1:length(fields)
if exist('fullParams','var') && isfield(fullParams,fields{i}{1})
disp(['Using the previously defined parameter ' fields{i}{1}])
else
fullParams.(fields{i}{1}) = fields{i}{2};
end
end

loadDataRetrain;

disp('Training mapping function');
% Train mapping function
trainParams.imageDataset = fullParams.dataset;
[theta, trainParams ] = fastTrain(XmapTrain, YmapTrain, trainParams, wordTable);
save(sprintf('%s/theta.mat', outputPath), 'theta', 'trainParams');
% Get train accuracy
mapDoEvaluate(XmapTrain, YmapTrain, label_names, label_names, wordTable, theta, trainParams, true);

disp('Training seen softmax features');
mappedCategories = zeros(1, numCategories);
mappedCategories(nonZeroCategories) = 1:numCategories-length(zeroCategories);
trainParamsSeen.nonZeroShotCategories = nonZeroCategories;
[thetaSeen, trainParamsSeen] = nonZeroShotTrain(XmapTrain, mappedCategories(YmapTrain), trainParamsSeen, label_names(nonZeroCategories));
save(sprintf('%s/thetaSeenSoftmax.mat', outputPath), 'thetaSeen', 'trainParamsSeen');
% Get train accuracy
softmaxDoEvaluate( XmapTrain, YmapTrain, label_names, thetaSeen, trainParamsSeen, true );

disp('Training unseen softmax features');
trainParamsUnseen.zeroShotCategories = zeroCategories;
trainParamsUnseen.imageDataset = fullParams.dataset;
trainParamsUnseen.wordDataset = fullParams.wordset;
[thetaUnseen, trainParamsUnseen] = zeroShotTrain(trainParamsUnseen);
save(sprintf('%s/thetaUnseenSoftmax.mat', outputPath), 'thetaUnseen', 'trainParamsUnseen');

disp('Training Gaussian classifier using Mixture of Gaussians');
% Train Gaussian classifier
mapped = mapDoMap(XmapTrain, theta, trainParams);
% [mu, sigma, priors] = trainGaussianDiscriminant(mapped, Y, numCategories, wordTable);

pp = struct('outlierOriginalSpace', false, 'topN', 140);
[~, outlierParams] = doOutlierDetection('loop', XmapTrain, YmapTrain, XoutlierTrain, theta, trainParams, wordTable, pp, zeroCategories);

sortedLogprobabilities = sort(predictGaussianDiscriminant(mapped, outlierParams.mu, outlierParams.sigma, outlierParams.priors, zeroCategories));

% Test
mappedTestImages = mapDoMap(testX, theta, trainParams);

resolution = fullParams.resolution;
gSeenAccuracies = zeros(1, resolution);
gUnseenAccuracies = zeros(1, resolution);
gAccuracies = zeros(1, resolution);
numPerIteration = floor(length(sortedLogprobabilities) / (resolution-1));
logprobabilities = predictGaussianDiscriminant(mappedTestImages, outlierParams.mu, outlierParams.sigma, outlierParams.priors, zeroCategories);
cutoffs = [ arrayfun(@(x) sortedLogprobabilities((x-1)*numPerIteration+1), 1:resolution-1) sortedLogprobabilities(end) ];
for i = 1:resolution
cutoff = cutoffs(i);
% Test Gaussian classifier
fprintf('With cutoff %f:\n', cutoff);
results = mapGaussianThresholdDoEvaluate( testX, testY, zeroCategories, label_names, wordTable, ...
theta, trainParams, thetaSeen, trainParamsSeen, thetaUnseen, trainParamsUnseen, logprobabilities, cutoff, true);

gSeenAccuracies(i) = results.seenAccuracy;
gUnseenAccuracies(i) = results.unseenAccuracy;
gAccuracies(i) = results.accuracy;
end
gSeenAccuracies = fliplr(gSeenAccuracies);
gUnseenAccuracies = fliplr(gUnseenAccuracies);
gAccuracies = fliplr(gAccuracies);

% disp('Training Gaussian classifier using PDF');
% % Train Gaussian classifier
% mapped = mapDoMap(X, theta, trainParams);
% [mu, sigma, priors] = trainGaussianDiscriminant(mapped, Y, numCategories, wordTable);
% sortedLogprobabilities = sort(predictGaussianDiscriminantMin(mapped, mu, sigma, zeroCategories));
%
% % Test
% mappedTestImages = mapDoMap(testX, theta, trainParams);
%
% resolution = fullParams.resolution;
% pdfSeenAccuracies = zeros(1, resolution);
% pdfUnseenAccuracies = zeros(1, resolution);
% pdfAccuracies = zeros(1, resolution);
% numPerIteration = floor(length(sortedLogprobabilities) / (resolution-1));
% logprobabilities = predictGaussianDiscriminantMin(mappedTestImages, mu, sigma, zeroCategories);
% cutoffs = [ arrayfun(@(x) sortedLogprobabilities((x-1)*numPerIteration+1), 1:resolution-1) sortedLogprobabilities(end) ];
% for i = 1:resolution
% cutoff = cutoffs(i);
% % Test Gaussian classifier
% fprintf('With cutoff %f:\n', cutoff);
% results = mapGaussianThresholdDoEvaluate( testX, testY, zeroCategories, label_names, wordTable, ...
% theta, trainParams, thetaSeen, trainParamsSeen, thetaUnseen, trainParamsUnseen, logprobabilities, cutoff, true);
%
% pdfSeenAccuracies(i) = results.seenAccuracy;
% pdfUnseenAccuracies(i) = results.unseenAccuracy;
% pdfAccuracies(i) = results.accuracy;
% end
% pdfSeenAccuracies = fliplr(pdfSeenAccuracies);
% pdfUnseenAccuracies = fliplr(pdfUnseenAccuracies);
% pdfAccuracies = fliplr(pdfAccuracies);

disp('Training LoOP model');
resolution = fullParams.resolution - 1;
thresholds = 0:(1/resolution):1;
% lambdas = 1:13;
% knn = 20;
% loopSeenAccuracies = zeros(length(lambdas), length(thresholds));
% loopUnseenAccuracies = zeros(length(lambdas), length(thresholds));
% loopAccuracies = zeros(length(lambdas), length(thresholds));
% nonZeroCategoryIdPerm = randperm(length(nonZeroCategories));
% bestLambdas = repmat(lambdas(round(length(lambdas)/2)), 1, length(nonZeroCategories));
% mappedValidationImages = mapDoMap(Xvalidate, theta, trainParams);

% for k = 1:length(nonZeroCategories)
% changedCategory = nonZeroCategoryIdPerm(k);
% for i = 1:length(lambdas)
% tempLambdas = bestLambdas;
% tempLambdas(changedCategory) = lambdas(i);
% disp(tempLambdas);
% [ nplofAll, pdistAll ] = trainOutlierPriors(mapped, Y, nonZeroCategories, numTrainPerCat, knn, tempLambdas);
% probs = calcOutlierPriors( mappedValidationImages, mapped, Y, numTrainPerCat, nonZeroCategories, tempLambdas, knn, nplofAll, pdistAll );
% for t = 1:length(thresholds)
% fprintf('Threshold %f: ', thresholds(t));
% [~, results] = anomalyDoEvaluate(thetaSeen, ...
% trainParamsSeen, thetaUnseen, trainParamsUnseen, probs, Xvalidate, mappedValidationImages, Yvalidate, ...
% thresholds(t), zeroCategories, nonZeroCategories, wordTable, false);
% loopSeenAccuracies(i, t) = results.seenAccuracy;
% loopUnseenAccuracies(i, t) = results.unseenAccuracy;
% loopAccuracies(i, t) = results.accuracy;
% fprintf('seen accuracy: %f, unseen accuracy: %f\n', results.seenAccuracy, results.unseenAccuracy);
% end
% end
% [~, t] = max(sum(loopAccuracies,2));
% bestLambdas(changedCategory) = t;
% end
% disp('Best:');
% disp(bestLambdas);

% Do it again, with best lambdas
loopSeenAccuracies = zeros(1, length(thresholds));
loopUnseenAccuracies = zeros(1, length(thresholds));
loopAccuracies = zeros(1, length(thresholds));
% [ nplofAll, pdistAll ] = trainOutlierPriors(mapped, Y, nonZeroCategories, numTrainPerCat, knn, bestLambdas);

pp = struct('outlierOriginalSpace', false, 'topN', 3000);
[~, outlierParams] = doOutlierDetection('loop', XmapTrain, YmapTrain, XoutlierTrain, theta, trainParams, wordTable, pp, zeroCategories);
probs = calcOutlierPriors( mappedTestImages, mapped, YmapTrain, numTrainMapPerCat, nonZeroCategories, outlierParams.bestLambdas, outlierParams.knn, outlierParams.nplofAll, outlierParams.pdistAll );
for t = 1:length(thresholds)
fprintf('Threshold %f: ', thresholds(t));
[~, results] = anomalyDoEvaluate(thetaSeen, ...
trainParamsSeen, thetaUnseen, trainParamsUnseen, probs, testX, mappedTestImages, testY, ...
thresholds(t), zeroCategories, nonZeroCategories, wordTable, false);
loopSeenAccuracies(t) = results.seenAccuracy;
loopUnseenAccuracies(t) = results.unseenAccuracy;
loopAccuracies(t) = results.accuracy;
fprintf('accuracy: %f, seen accuracy: %f, unseen accuracy: %f\n', results.accuracy, results.seenAccuracy, results.unseenAccuracy);
end
% save(sprintf('%s/bestLambdas.mat', outputPath), 'bestLambdas');

disp('Run Bayesian pipeline');
[~, bayesianResult] = mapBayesianDoEvaluate(thetaSeen, thetaUnseen, ...
theta, trainParamsSeen, trainParamsUnseen, trainParams, mapped, YmapTrain, testX, ...
testY, bestLambdas, knn, nplofAll, pdistAll, numTrainMapPerCat, zeroCategories, nonZeroCategories, label_names, true);

save(sprintf('%s/out_%s.mat', outputPath, zeroStr), 'gSeenAccuracies', 'gUnseenAccuracies', 'gAccuracies', ...
'loopSeenAccuracies', 'loopUnseenAccuracies', 'loopAccuracies', 'pdfSeenAccuracies', 'pdfUnseenAccuracies', ...
'pdfAccuracies', 'bayesianResult');

0 comments on commit 0666d51

Please sign in to comment.