refactor combination methods by depending on combo instead

kranthi-nord · Dec 21, 2019 · ac32c31 · ac32c31
1 parent 20e8c23
commit ac32c31
Show file tree

Hide file tree

Showing 5 changed files with 18 additions and 138 deletions.
diff --git a/CHANGES.txt b/CHANGES.txt
@@ -88,7 +88,7 @@ v<0.7.5.1>, <12/05/2019> -- Hot fix for scikit-learn 0.22 update. To be complete
 v<0.7.5.1>, <12/05/2019> -- Disable CircleCI for Python 2.7.
 v<0.7.6>, <12/18/2019> -- Update Isolation Forest and LOF to be consistent with sklearn 0.22.
 v<0.7.6>, <12/18/2019> -- Add Deviation-based Outlier Detection (LMDD).
-
+v<0.7.7>, <12/21/2019> -- Refactor code for combination simplification on combo.
 
 
 

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,3 +1,4 @@
+combo
 joblib
 keras
 matplotlib

diff --git a/pyod/models/combination.py b/pyod/models/combination.py
@@ -9,6 +9,13 @@
 
 import numpy as np
 from numpy.random import RandomState
+from combo.models.score_comb import aom as combo_aom
+from combo.models.score_comb import moa as combo_moa
+from combo.models.score_comb import average as combo_average
+from combo.models.score_comb import maximization as combo_maximization
+from combo.models.score_comb import majority_vote as combo_majority_vote
+from combo.models.score_comb import median as combo_median
+
 from sklearn.utils import check_array
 from sklearn.utils import column_or_1d
 # noinspection PyProtectedMember
@@ -18,118 +25,6 @@
 from ..utils.utility import check_parameter
 
 
-def _aom_moa_helper(mode, scores, n_buckets, method, bootstrap_estimators,
-                    random_state):
-    """Internal helper function for Average of Maximum (AOM) and
-    Maximum of Average (MOA). See :cite:`aggarwal2015theoretical` for details.
-
-    First dividing estimators into subgroups, take the maximum/average score
-    as the subgroup score. Finally, take the average/maximum of all subgroup
-    outlier scores.
-
-    Parameters
-    ----------
-    mode : str
-        Define the operation model, either "AOM" or "MOA".
-
-    scores : numpy array of shape (n_samples, n_estimators)
-        The score matrix outputted from various estimators.
-
-    n_buckets : int, optional (default=5)
-        The number of subgroups to build.
-
-    method : str, optional (default='static')
-        {'static', 'dynamic'}, if 'dynamic', build subgroups
-        randomly with dynamic bucket size.
-
-    bootstrap_estimators : bool, optional (default=False)
-        Whether estimators are drawn with replacement.
-
-    random_state : int, RandomState instance or None, optional (default=None)
-        If int, random_state is the seed used by the
-        random number generator; If RandomState instance, random_state is
-        the random number generator; If None, the random number generator
-        is the RandomState instance used by `np.random`.
-
-    Returns
-    -------
-    combined_scores : Numpy array of shape (n_samples,)
-        The combined outlier scores.
-
-    """
-
-    if mode != 'AOM' and mode != 'MOA':
-        raise NotImplementedError(
-            '{mode} is not implemented'.format(mode=mode))
-
-    scores = check_array(scores)
-    # TODO: add one more parameter for max number of estimators
-    # use random_state instead
-    # for now it is fixed at n_estimators/2
-    n_estimators = scores.shape[1]
-    check_parameter(n_buckets, 2, n_estimators, param_name='n_buckets')
-
-    scores_buckets = np.zeros([scores.shape[0], n_buckets])
-
-    if method == 'static':
-
-        n_estimators_per_bucket = int(n_estimators / n_buckets)
-        if n_estimators % n_buckets != 0:
-            raise ValueError('n_estimators / n_buckets has a remainder. Not '
-                             'allowed in static mode.')
-
-        if not bootstrap_estimators:
-            # shuffle the estimator order
-            shuffled_list = shuffle(list(range(0, n_estimators, 1)),
-                                    random_state=random_state)
-
-            head = 0
-            for i in range(0, n_estimators, n_estimators_per_bucket):
-                tail = i + n_estimators_per_bucket
-                batch_ind = int(i / n_estimators_per_bucket)
-                if mode == 'AOM':
-                    scores_buckets[:, batch_ind] = np.max(
-                        scores[:, shuffled_list[head:tail]], axis=1)
-                else:
-                    scores_buckets[:, batch_ind] = np.mean(
-                        scores[:, shuffled_list[head:tail]], axis=1)
-
-                # increment index
-                head = head + n_estimators_per_bucket
-                # noinspection PyUnusedLocal
-        else:
-            for i in range(n_buckets):
-                ind = sample_without_replacement(n_estimators,
-                                                 n_estimators_per_bucket,
-                                                 random_state=random_state)
-                if mode == 'AOM':
-                    scores_buckets[:, i] = np.max(scores[:, ind], axis=1)
-                else:
-                    scores_buckets[:, i] = np.mean(scores[:, ind], axis=1)
-
-    elif method == 'dynamic':  # random bucket size
-        for i in range(n_buckets):
-            # the number of estimators in a bucket should be 2 - n/2
-            max_estimator_per_bucket = RandomState(seed=random_state).randint(
-                2, int(n_estimators / 2))
-            ind = sample_without_replacement(n_estimators,
-                                             max_estimator_per_bucket,
-                                             random_state=random_state)
-            if mode == 'AOM':
-                scores_buckets[:, i] = np.max(scores[:, ind], axis=1)
-            else:
-                scores_buckets[:, i] = np.mean(scores[:, ind], axis=1)
-
-    else:
-        raise NotImplementedError(
-            '{method} is not implemented'.format(method=method))
-
-    if mode == 'AOM':
-        return np.mean(scores_buckets, axis=1)
-    else:
-        return np.max(scores_buckets, axis=1)
-
-
 def aom(scores, n_buckets=5, method='static', bootstrap_estimators=False,
         random_state=None):
     """Average of Maximum - An ensemble method for combining multiple
@@ -165,8 +60,9 @@ def aom(scores, n_buckets=5, method='static', bootstrap_estimators=False,
         The combined outlier scores.
 
     """
-    return _aom_moa_helper('AOM', scores, n_buckets, method,
-                           bootstrap_estimators, random_state)
+
+    return combo_aom(scores, n_buckets, method, bootstrap_estimators,
+                     random_state)
 
 
 def moa(scores, n_buckets=5, method='static', bootstrap_estimators=False,
@@ -205,8 +101,8 @@ def moa(scores, n_buckets=5, method='static', bootstrap_estimators=False,
         The combined outlier scores.
 
     """
-    return _aom_moa_helper('MOA', scores, n_buckets, method,
-                           bootstrap_estimators, random_state)
+    return combo_moa(scores, n_buckets, method, bootstrap_estimators,
+                     random_state)
 
 
 def average(scores, estimator_weights=None):
@@ -227,24 +123,7 @@ def average(scores, estimator_weights=None):
         The combined outlier scores.
 
     """
-    scores = check_array(scores)
-
-    if estimator_weights is not None:
-        if estimator_weights.shape != (1, scores.shape[1]):
-            raise ValueError(
-                'Bad input shape of estimator_weight: (1, {score_shape}),'
-                'and {estimator_weights} received'.format(
-                    score_shape=scores.shape[1],
-                    estimator_weights=estimator_weights.shape))
-
-        # (d1*w1 + d2*w2 + ...+ dn*wn)/(w1+w2+...+wn)
-        # generated weighted scores
-        scores = np.sum(np.multiply(scores, estimator_weights),
-                        axis=1) / np.sum(estimator_weights)
-        return scores.ravel()
-
-    else:
-        return np.mean(scores, axis=1).ravel()
+    return combo_average(scores, estimator_weights)
 
 
 def maximization(scores):
@@ -262,6 +141,4 @@ def maximization(scores):
         The combined outlier scores.
 
     """
-
-    scores = check_array(scores)
-    return np.max(scores, axis=1).ravel()
+    return combo_maximization(scores)
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,4 @@
+combo
 joblib
 matplotlib
 numpy>=1.13

diff --git a/requirements_ci.txt b/requirements_ci.txt
@@ -1,3 +1,4 @@
+combo
 joblib
 keras
 matplotlib
-Original file line number
+Diff line change
@@ -1,3 +1,4 @@
+    combo
     joblib
     keras
     matplotlib
@@ Expand Down @@