forked from yzhao062/pyod
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'origin/master'
- Loading branch information
Showing
20 changed files
with
596 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
1. ECOD parallelization and interpretability | ||
2. Add latest deep learning algorithms. | ||
3. finish the wrapping for cook distance detector |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
combo | ||
furo | ||
joblib | ||
keras | ||
matplotlib | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
"""Example of using Cook's distance (CD) for | ||
outlier detection | ||
""" | ||
# Author: D Kulik | ||
# License: BSD 2 clause | ||
|
||
from __future__ import division | ||
from __future__ import print_function | ||
|
||
import os | ||
import sys | ||
|
||
# temporary solution for relative imports in case pyod is not installed | ||
# if pyod is installed, no need to use the following line | ||
sys.path.append( | ||
os.path.abspath(os.path.join(os.path.dirname("__file__"), '..'))) | ||
|
||
import numpy as np | ||
from pyod.models.cd import CD | ||
from pyod.utils.data import generate_data | ||
from pyod.utils.data import evaluate_print | ||
from pyod.utils.example import visualize | ||
|
||
if __name__ == "__main__": | ||
contamination = 0.1 # percentage of outliers | ||
n_train = 200 # number of training points | ||
n_test = 100 # number of testing points | ||
|
||
# Generate sample data | ||
X_train, y_train, X_test, y_test = \ | ||
generate_data(n_train=n_train, | ||
n_test=n_test, | ||
n_features=2, | ||
contamination=contamination, | ||
random_state=42) | ||
|
||
# train HBOS detector | ||
clf_name = 'CD' | ||
clf = CD() | ||
clf.fit(X_train, y_train) | ||
|
||
# get the prediction labels and outlier scores of the training data | ||
y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers) | ||
y_train_scores = clf.decision_scores_ # raw outlier scores | ||
|
||
# get the prediction on the test data | ||
y_test_pred = clf.predict(np.append(X_test, y_test.reshape(-1,1), axis=1)) # outlier labels (0 or 1) | ||
y_test_scores = clf.decision_function(np.append(X_test, y_test.reshape(-1,1), axis=1)) # outlier scores | ||
|
||
# evaluate and print the results | ||
print("\nOn Training Data:") | ||
evaluate_print(clf_name, y_train, y_train_scores) | ||
print("\nOn Test Data:") | ||
evaluate_print(clf_name, y_test, y_test_scores) | ||
|
||
# visualize the results | ||
visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred, | ||
y_test_pred, show_figure=True, save_figure=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,56 @@ | ||
# -*- coding: utf-8 -*- | ||
"""Example of using Copula Based Outlier Detector (COPOD) for outlier detection | ||
Sample wise interpretation is provided here. | ||
""" | ||
# Author: Winston Li <[email protected]> | ||
# License: BSD 2 clause | ||
|
||
from __future__ import division | ||
from __future__ import print_function | ||
|
||
import os | ||
import sys | ||
|
||
# temporary solution for relative imports in case pyod is not installed | ||
# if pyod is installed, no need to use the following line | ||
sys.path.append( | ||
os.path.abspath(os.path.join(os.path.dirname("__file__"), '..'))) | ||
|
||
from scipy.io import loadmat | ||
from sklearn.model_selection import train_test_split | ||
|
||
from pyod.models.copod import COPOD | ||
from pyod.utils.utility import standardizer | ||
|
||
if __name__ == "__main__": | ||
# Define data file and read X and y | ||
# Generate some data if the source data is missing | ||
mat_file = 'cardio.mat' | ||
|
||
mat = loadmat(os.path.join('data', mat_file)) | ||
X = mat['X'] | ||
y = mat['y'].ravel() | ||
|
||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, | ||
random_state=1) | ||
|
||
# standardizing data for processing | ||
X_train_norm, X_test_norm = standardizer(X_train, X_test) | ||
|
||
# train COPOD detector | ||
clf_name = 'COPOD' | ||
clf = COPOD() | ||
|
||
# you could try parallel version as well. | ||
# clf = COPOD(n_jobs=2) | ||
clf.fit(X_train) | ||
|
||
# get the prediction labels and outlier scores of the training data | ||
y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers) | ||
y_train_scores = clf.decision_scores_ # raw outlier scores | ||
|
||
print('The first sample is an outlier', y_train[0]) | ||
clf.explain_outlier(0) | ||
|
||
# we could see feature 7, 16, and 20 is above the 0.99 cutoff | ||
# and play a more important role in deciding it is an outlier. |
Oops, something went wrong.