Skip to content

Commit

Permalink
refactor combination methods by depending on combo instead
Browse files Browse the repository at this point in the history
  • Loading branch information
yzhao062 committed Dec 21, 2019
1 parent 20e8c23 commit ac32c31
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 138 deletions.
2 changes: 1 addition & 1 deletion CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ v<0.7.5.1>, <12/05/2019> -- Hot fix for scikit-learn 0.22 update. To be complete
v<0.7.5.1>, <12/05/2019> -- Disable CircleCI for Python 2.7.
v<0.7.6>, <12/18/2019> -- Update Isolation Forest and LOF to be consistent with sklearn 0.22.
v<0.7.6>, <12/18/2019> -- Add Deviation-based Outlier Detection (LMDD).

v<0.7.7>, <12/21/2019> -- Refactor code for combination simplification on combo.



Expand Down
1 change: 1 addition & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
combo
joblib
keras
matplotlib
Expand Down
151 changes: 14 additions & 137 deletions pyod/models/combination.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@

import numpy as np
from numpy.random import RandomState
from combo.models.score_comb import aom as combo_aom
from combo.models.score_comb import moa as combo_moa
from combo.models.score_comb import average as combo_average
from combo.models.score_comb import maximization as combo_maximization
from combo.models.score_comb import majority_vote as combo_majority_vote
from combo.models.score_comb import median as combo_median

from sklearn.utils import check_array
from sklearn.utils import column_or_1d
# noinspection PyProtectedMember
Expand All @@ -18,118 +25,6 @@
from ..utils.utility import check_parameter


def _aom_moa_helper(mode, scores, n_buckets, method, bootstrap_estimators,
random_state):
"""Internal helper function for Average of Maximum (AOM) and
Maximum of Average (MOA). See :cite:`aggarwal2015theoretical` for details.
First dividing estimators into subgroups, take the maximum/average score
as the subgroup score. Finally, take the average/maximum of all subgroup
outlier scores.
Parameters
----------
mode : str
Define the operation model, either "AOM" or "MOA".
scores : numpy array of shape (n_samples, n_estimators)
The score matrix outputted from various estimators.
n_buckets : int, optional (default=5)
The number of subgroups to build.
method : str, optional (default='static')
{'static', 'dynamic'}, if 'dynamic', build subgroups
randomly with dynamic bucket size.
bootstrap_estimators : bool, optional (default=False)
Whether estimators are drawn with replacement.
random_state : int, RandomState instance or None, optional (default=None)
If int, random_state is the seed used by the
random number generator; If RandomState instance, random_state is
the random number generator; If None, the random number generator
is the RandomState instance used by `np.random`.
Returns
-------
combined_scores : Numpy array of shape (n_samples,)
The combined outlier scores.
"""

if mode != 'AOM' and mode != 'MOA':
raise NotImplementedError(
'{mode} is not implemented'.format(mode=mode))

scores = check_array(scores)
# TODO: add one more parameter for max number of estimators
# use random_state instead
# for now it is fixed at n_estimators/2
n_estimators = scores.shape[1]
check_parameter(n_buckets, 2, n_estimators, param_name='n_buckets')

scores_buckets = np.zeros([scores.shape[0], n_buckets])

if method == 'static':

n_estimators_per_bucket = int(n_estimators / n_buckets)
if n_estimators % n_buckets != 0:
raise ValueError('n_estimators / n_buckets has a remainder. Not '
'allowed in static mode.')

if not bootstrap_estimators:
# shuffle the estimator order
shuffled_list = shuffle(list(range(0, n_estimators, 1)),
random_state=random_state)

head = 0
for i in range(0, n_estimators, n_estimators_per_bucket):
tail = i + n_estimators_per_bucket
batch_ind = int(i / n_estimators_per_bucket)
if mode == 'AOM':
scores_buckets[:, batch_ind] = np.max(
scores[:, shuffled_list[head:tail]], axis=1)
else:
scores_buckets[:, batch_ind] = np.mean(
scores[:, shuffled_list[head:tail]], axis=1)

# increment index
head = head + n_estimators_per_bucket
# noinspection PyUnusedLocal
else:
for i in range(n_buckets):
ind = sample_without_replacement(n_estimators,
n_estimators_per_bucket,
random_state=random_state)
if mode == 'AOM':
scores_buckets[:, i] = np.max(scores[:, ind], axis=1)
else:
scores_buckets[:, i] = np.mean(scores[:, ind], axis=1)

elif method == 'dynamic': # random bucket size
for i in range(n_buckets):
# the number of estimators in a bucket should be 2 - n/2
max_estimator_per_bucket = RandomState(seed=random_state).randint(
2, int(n_estimators / 2))
ind = sample_without_replacement(n_estimators,
max_estimator_per_bucket,
random_state=random_state)
if mode == 'AOM':
scores_buckets[:, i] = np.max(scores[:, ind], axis=1)
else:
scores_buckets[:, i] = np.mean(scores[:, ind], axis=1)

else:
raise NotImplementedError(
'{method} is not implemented'.format(method=method))

if mode == 'AOM':
return np.mean(scores_buckets, axis=1)
else:
return np.max(scores_buckets, axis=1)


def aom(scores, n_buckets=5, method='static', bootstrap_estimators=False,
random_state=None):
"""Average of Maximum - An ensemble method for combining multiple
Expand Down Expand Up @@ -165,8 +60,9 @@ def aom(scores, n_buckets=5, method='static', bootstrap_estimators=False,
The combined outlier scores.
"""
return _aom_moa_helper('AOM', scores, n_buckets, method,
bootstrap_estimators, random_state)

return combo_aom(scores, n_buckets, method, bootstrap_estimators,
random_state)


def moa(scores, n_buckets=5, method='static', bootstrap_estimators=False,
Expand Down Expand Up @@ -205,8 +101,8 @@ def moa(scores, n_buckets=5, method='static', bootstrap_estimators=False,
The combined outlier scores.
"""
return _aom_moa_helper('MOA', scores, n_buckets, method,
bootstrap_estimators, random_state)
return combo_moa(scores, n_buckets, method, bootstrap_estimators,
random_state)


def average(scores, estimator_weights=None):
Expand All @@ -227,24 +123,7 @@ def average(scores, estimator_weights=None):
The combined outlier scores.
"""
scores = check_array(scores)

if estimator_weights is not None:
if estimator_weights.shape != (1, scores.shape[1]):
raise ValueError(
'Bad input shape of estimator_weight: (1, {score_shape}),'
'and {estimator_weights} received'.format(
score_shape=scores.shape[1],
estimator_weights=estimator_weights.shape))

# (d1*w1 + d2*w2 + ...+ dn*wn)/(w1+w2+...+wn)
# generated weighted scores
scores = np.sum(np.multiply(scores, estimator_weights),
axis=1) / np.sum(estimator_weights)
return scores.ravel()

else:
return np.mean(scores, axis=1).ravel()
return combo_average(scores, estimator_weights)


def maximization(scores):
Expand All @@ -262,6 +141,4 @@ def maximization(scores):
The combined outlier scores.
"""

scores = check_array(scores)
return np.max(scores, axis=1).ravel()
return combo_maximization(scores)
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
combo
joblib
matplotlib
numpy>=1.13
Expand Down
1 change: 1 addition & 0 deletions requirements_ci.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
combo
joblib
keras
matplotlib
Expand Down

0 comments on commit ac32c31

Please sign in to comment.