forked from yzhao062/pyod
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add cluster-based local outlier factor (CBLOF)
- Loading branch information
1 parent
189f6c0
commit 1ba7e6f
Showing
9 changed files
with
376 additions
and
172 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
# -*- coding: utf-8 -*- | ||
"""Example of using Cluster-based Local Outlier Factor (CBLOF) for outlier | ||
detection | ||
""" | ||
# Author: Yue Zhao <[email protected]> | ||
# License: BSD 2 clause | ||
|
||
from __future__ import division | ||
from __future__ import print_function | ||
|
||
import os | ||
import sys | ||
|
||
# temporary solution for relative imports in case pyod is not installed | ||
# if pyod is installed, no need to use the following line | ||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) | ||
|
||
from sklearn.utils import check_X_y | ||
import matplotlib.pyplot as plt | ||
from matplotlib.lines import Line2D | ||
|
||
from pyod.models.cblof import CBLOF | ||
from pyod.utils.data import generate_data | ||
from pyod.utils.data import get_color_codes | ||
from pyod.utils.data import evaluate_print | ||
|
||
|
||
def visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred, | ||
y_test_pred, show_figure=True, | ||
save_figure=False): # pragma: no cover | ||
""" | ||
Utility function for visualizing the results in examples | ||
Internal use only | ||
:param clf_name: The name of the detector | ||
:type clf_name: str | ||
:param X_train: The training samples | ||
:param X_train: numpy array of shape (n_samples, n_features) | ||
:param y_train: The ground truth of training samples | ||
:type y_train: list or array of shape (n_samples,) | ||
:param X_test: The test samples | ||
:type X_test: numpy array of shape (n_samples, n_features) | ||
:param y_test: The ground truth of test samples | ||
:type y_test: list or array of shape (n_samples,) | ||
:param y_train_pred: The predicted outlier scores on the training samples | ||
:type y_train_pred: numpy array of shape (n_samples, n_features) | ||
:param y_test_pred: The predicted outlier scores on the test samples | ||
:type y_test_pred: numpy array of shape (n_samples, n_features) | ||
:param show_figure: If set to True, show the figure | ||
:type show_figure: bool, optional (default=True) | ||
:param save_figure: If set to True, save the figure to the local | ||
:type save_figure: bool, optional (default=False) | ||
""" | ||
|
||
if X_train.shape[1] != 2 or X_test.shape[1] != 2: | ||
raise ValueError("Input data has to be 2-d for visualization. The " | ||
"input data has {shape}.".format(shape=X_train.shape)) | ||
|
||
X_train, y_train = check_X_y(X_train, y_train) | ||
X_test, y_test = check_X_y(X_test, y_test) | ||
c_train = get_color_codes(y_train) | ||
c_test = get_color_codes(y_test) | ||
|
||
fig = plt.figure(figsize=(12, 10)) | ||
plt.suptitle("Demo of {clf_name}".format(clf_name=clf_name)) | ||
|
||
fig.add_subplot(221) | ||
plt.scatter(X_train[:, 0], X_train[:, 1], c=c_train) | ||
plt.title('Train ground truth') | ||
legend_elements = [Line2D([0], [0], marker='o', color='w', label='normal', | ||
markerfacecolor='b', markersize=8), | ||
Line2D([0], [0], marker='o', color='w', label='outlier', | ||
markerfacecolor='r', markersize=8)] | ||
|
||
plt.legend(handles=legend_elements, loc=4) | ||
|
||
fig.add_subplot(222) | ||
plt.scatter(X_test[:, 0], X_test[:, 1], c=c_test) | ||
plt.title('Test ground truth') | ||
plt.legend(handles=legend_elements, loc=4) | ||
|
||
fig.add_subplot(223) | ||
plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train_pred) | ||
plt.title('Train prediction by {clf_name}'.format(clf_name=clf_name)) | ||
legend_elements = [Line2D([0], [0], marker='o', color='w', label='normal', | ||
markerfacecolor='0', markersize=8), | ||
Line2D([0], [0], marker='o', color='w', label='outlier', | ||
markerfacecolor='yellow', markersize=8)] | ||
plt.legend(handles=legend_elements, loc=4) | ||
|
||
fig.add_subplot(224) | ||
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test_pred) | ||
plt.title('Test prediction by {clf_name}'.format(clf_name=clf_name)) | ||
plt.legend(handles=legend_elements, loc=4) | ||
|
||
if save_figure: | ||
plt.savefig('{clf_name}.png'.format(clf_name=clf_name), dpi=300) | ||
if show_figure: | ||
plt.show() | ||
return | ||
|
||
|
||
if __name__ == "__main__": | ||
contamination = 0.1 # percentage of outliers | ||
n_train = 200 # number of training points | ||
n_test = 100 # number of testing points | ||
|
||
# Generate sample data | ||
X_train, y_train, X_test, y_test = \ | ||
generate_data(n_train=n_train, | ||
n_test=n_test, | ||
n_features=2, | ||
contamination=contamination, | ||
random_state=42) | ||
|
||
# train CBLOF detector | ||
clf_name = 'CBLOF' | ||
clf = CBLOF() | ||
clf.fit(X_train) | ||
|
||
# get the prediction labels and outlier scores of the training data | ||
y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers) | ||
y_train_scores = clf.decision_scores_ # raw outlier scores | ||
|
||
# get the prediction on the test data | ||
y_test_pred = clf.predict(X_test) # outlier labels (0 or 1) | ||
y_test_scores = clf.decision_function(X_test) # outlier scores | ||
|
||
# evaluate and print the results | ||
print("\nOn Training Data:") | ||
evaluate_print(clf_name, y_train, y_train_scores) | ||
print("\nOn Test Data:") | ||
evaluate_print(clf_name, y_test, y_test_scores) | ||
|
||
# visualize the results | ||
visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred, | ||
y_test_pred, show_figure=True, save_figure=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
# -*- coding: utf-8 -*- | ||
__version__ = '0.5.3' | ||
__version__ = '0.5.4' | ||
|
||
from . import models | ||
from . import utils | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.