Skip to content

Commit

Permalink
Revert "ENH add a parameter pos_label in roc_auc_score (scikit-learn#…
Browse files Browse the repository at this point in the history
…17594)" (scikit-learn#17703)

This reverts commit fde9212.
  • Loading branch information
ogrisel authored Jun 24, 2020
1 parent 7cc0177 commit 59249d7
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 72 deletions.
3 changes: 0 additions & 3 deletions doc/whats_new/v0.24.rst
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,6 @@ Changelog
class to be used when computing the precision and recall statistics.
:pr:`17569` by :user:`Guillaume Lemaitre <glemaitre>`.

- |Enhancement| Add `pos_label` parameter to :func:`roc_auc_score`.
:pr:`17594` by :user:`Guillaume Lemaitre <glemaitre>`.

:mod:`sklearn.model_selection`
..............................

Expand Down
23 changes: 7 additions & 16 deletions sklearn/metrics/_ranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,16 +218,14 @@ def _binary_uninterpolated_average_precision(
average, sample_weight=sample_weight)


def _binary_roc_auc_score(y_true, y_score, sample_weight=None, max_fpr=None,
pos_label=None):
def _binary_roc_auc_score(y_true, y_score, sample_weight=None, max_fpr=None):
"""Binary roc auc score"""
if len(np.unique(y_true)) != 2:
raise ValueError("Only one class present in y_true. ROC AUC score "
"is not defined in that case.")

fpr, tpr, _ = roc_curve(
y_true, y_score, sample_weight=sample_weight, pos_label=pos_label,
)
fpr, tpr, _ = roc_curve(y_true, y_score,
sample_weight=sample_weight)
if max_fpr is None or max_fpr == 1:
return auc(fpr, tpr)
if max_fpr <= 0 or max_fpr > 1:
Expand All @@ -250,8 +248,7 @@ def _binary_roc_auc_score(y_true, y_score, sample_weight=None, max_fpr=None,

@_deprecate_positional_args
def roc_auc_score(y_true, y_score, *, average="macro", sample_weight=None,
max_fpr=None, multi_class="raise", labels=None,
pos_label=None):
max_fpr=None, multi_class="raise", labels=None):
"""Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC)
from prediction scores.
Expand Down Expand Up @@ -330,13 +327,6 @@ def roc_auc_score(y_true, y_score, *, average="macro", sample_weight=None,
If ``None``, the numerical or lexicographical order of the labels in
``y_true`` is used.
pos_label : int or str, default=None
The label of the positive class in the binary case. When
`pos_label=None`, if `y_true` is in {-1, 1} or {0, 1}, `pos_label` is
set to 1, otherwise an error will be raised.
.. versionadded:: 0.24
Returns
-------
auc : float
Expand Down Expand Up @@ -398,9 +388,10 @@ def roc_auc_score(y_true, y_score, *, average="macro", sample_weight=None,
return _multiclass_roc_auc_score(y_true, y_score, labels,
multi_class, average, sample_weight)
elif y_type == "binary":
labels = np.unique(y_true)
y_true = label_binarize(y_true, classes=labels)[:, 0]
return _average_binary_score(partial(_binary_roc_auc_score,
max_fpr=max_fpr,
pos_label=pos_label),
max_fpr=max_fpr),
y_true, y_score, average,
sample_weight=sample_weight)
else: # multilabel-indicator
Expand Down
11 changes: 0 additions & 11 deletions sklearn/metrics/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,17 +319,6 @@ def precision_recall_curve_padded_thresholds(*args, **kwargs):
# Metrics with a "pos_label" argument
METRICS_WITH_POS_LABEL = {
"roc_curve",

"roc_auc_score",
"weighted_roc_auc",
"samples_roc_auc",
"micro_roc_auc",
"ovr_roc_auc",
"weighted_ovr_roc_auc",
"ovo_roc_auc",
"weighted_ovo_roc_auc",
"partial_roc_auc",

"precision_recall_curve",

"brier_score_loss",
Expand Down
43 changes: 1 addition & 42 deletions sklearn/metrics/tests/test_ranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,9 @@
from sklearn import datasets
from sklearn import svm

from sklearn.utils.extmath import softmax
from sklearn.datasets import make_multilabel_classification
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.random_projection import _sparse_random_matrix
from sklearn.utils import shuffle
from sklearn.utils.extmath import softmax
from sklearn.utils.validation import check_array, check_consistent_length
from sklearn.utils.validation import check_random_state

Expand Down Expand Up @@ -1473,40 +1469,3 @@ def test_partial_roc_auc_score():
assert_almost_equal(
roc_auc_score(y_true, y_pred, max_fpr=max_fpr),
_partial_roc_auc_score(y_true, y_pred, max_fpr))


@pytest.mark.parametrize(
"decision_method", ["predict_proba", "decision_function"]
)
def test_roc_auc_score_pos_label(decision_method):
X, y = load_breast_cancer(return_X_y=True)
# create an highly imbalanced
idx_positive = np.flatnonzero(y == 1)
idx_negative = np.flatnonzero(y == 0)
idx_selected = np.hstack([idx_negative, idx_positive[:25]])
X, y = X[idx_selected], y[idx_selected]
X, y = shuffle(X, y, random_state=42)
# only use 2 features to make the problem even harder
X = X[:, :2]
y = np.array(
["cancer" if c == 1 else "not cancer" for c in y], dtype=object
)
X_train, X_test, y_train, y_test = train_test_split(
X, y, stratify=y, random_state=0,
)

classifier = LogisticRegression()
classifier.fit(X_train, y_train)

# sanity check to be sure the positive class is classes_[0] and that we
# are betrayed by the class imbalance
assert classifier.classes_.tolist() == ["cancer", "not cancer"]
pos_label = "cancer"

y_pred = getattr(classifier, decision_method)(X_test)
y_pred = y_pred[:, 0] if y_pred.ndim == 2 else -y_pred

fpr, tpr, _ = roc_curve(y_test, y_pred, pos_label=pos_label)
roc_auc = roc_auc_score(y_test, y_pred, pos_label=pos_label)

assert roc_auc == pytest.approx(np.trapz(tpr, fpr))

0 comments on commit 59249d7

Please sign in to comment.