add tests

valteresj2 · Apr 9, 2020 · 479e23f · 479e23f
1 parent e61c7d6
commit 479e23f
Show file tree

Hide file tree

Showing 5 changed files with 117 additions and 49 deletions.
diff --git a/examples/quadrativ_effect_on_1mio_rows.py b/examples/quadrativ_effect_on_1mio_rows.py
@@ -15,7 +15,7 @@ def heatmap(df):
 df = pd.DataFrame()
 df["x"] = np.random.uniform(-2, 2, 1_000_000)
 df["error"] = np.random.uniform(-0.5, 0.5, 1_000_000)
-df["y"] = df["x"]*df["x"] + df["error"]
+df["y"] = df["x"] * df["x"] + df["error"]
 
 # %%
 sns.scatterplot(x="x", y="y", data=df.sample(10_000))

diff --git a/src/ppscore/__init__.py b/src/ppscore/__init__.py
@@ -5,7 +5,7 @@
     # Change here if project is renamed and does not equal the package name
     dist_name = __name__
     __version__ = get_distribution(dist_name).version
-except DistributionNotFound:
+except DistributionNotFound:  # pragma: no cover
     __version__ = "unknown"
 finally:
     del get_distribution, DistributionNotFound

diff --git a/src/ppscore/calculation.py b/src/ppscore/calculation.py
@@ -1,4 +1,3 @@
-# %%
 from sklearn import tree
 from sklearn import preprocessing
 from sklearn.model_selection import cross_val_score
@@ -30,7 +29,6 @@
 NUMERIC_AS_CATEGORIC_BREAKPOINT = 15
 
 
-# %%
 # https://scikit-learn.org/stable/modules/tree.html
 
 # https://scikit-learn.org/stable/modules/cross_validation.html
@@ -60,29 +58,24 @@ def _calculate_model_cv_score_(df, target, feature, metric, model, **kwargs):
         # reshaping needed because there is only 1 feature
         feature_df = df[feature].values.reshape(-1, 1)
 
+    # Crossvalidation is stratifiedKFold for classification, KFold for regression
     scores = cross_val_score(
         model, feature_df, target_series, cv=CV_ITERATIONS, scoring=metric
     )
 
-    # Crossvalidation is stratifiedKFold for classification, KFold for regression
     return scores.mean()
 
 
-# %%
-
-# %%
 def _normalized_mae_score(model_mae, naive_mae):
     # 10, 5 >> 0 because worse than naive
     # 10, 20 >> 0.5
     # 5, 20 >> 0.75 = 1 - mae/base_mae
     if model_mae > naive_mae:
-        # maybe add warning?
         return 0
     else:
         return 1 - (model_mae / naive_mae)
 
 
-# %%
 def _mae_normalizer(df, y, model_score):
     df["naive"] = df[y].mean()
     baseline_score = mean_absolute_error(df[y], df["naive"])  # true, pred
@@ -91,7 +84,6 @@ def _mae_normalizer(df, y, model_score):
     return ppscore, baseline_score
 
 
-# %%
 def _normalized_f1_score(model_f1, baseline_f1):
     ## F1 ranges from 0 to 1
     ## 1 is best
@@ -106,7 +98,6 @@ def _normalized_f1_score(model_f1, baseline_f1):
         return f1_diff / scale_range  # 0.1/0.3 = 0.33
 
 
-# %%
 def _f1_normalizer(df, y, model_score):
     df["naive"] = df[y].value_counts().index[0]
     baseline_score = f1_score(df[y], df["naive"], average="weighted")
@@ -115,7 +106,6 @@ def _f1_normalizer(df, y, model_score):
     return ppscore, baseline_score
 
 
-# %%
 TASKS = {
     "regression": {
         "metric_name": "mean absolute error",
@@ -150,14 +140,11 @@ def _f1_normalizer(df, y, model_score):
 }
 
 
-# %%
 def _infer_task(df, x, y):
     if x == y:
         return "predict_itself"
 
     category_count = df[y].value_counts().count()
-    if category_count == 0:
-        raise Exception(f"The target column {y} does not have valid values.")
     if category_count == 1:
         return "predict_constant"
     if category_count == 2:
@@ -183,10 +170,9 @@ def _infer_task(df, x, y):
 
     raise Exception(
         f"Could not infer a valid task based on the target {y}. The dtype {df[y].dtype} is not yet supported"
-    )
+    )  # pragma: no cover
 
 
-# %%
 def _feature_is_id(df, x):
     if not (is_string_dtype(df[x]) or is_categorical_dtype(df[x])):
         return False
@@ -195,7 +181,6 @@ def _feature_is_id(df, x):
     return category_count == len(df[x])
 
 
-# %%
 def _maybe_sample(df, sample):
     if sample and len(df) > sample:
         # this is a problem if x or y have more than sample=5000 categories
@@ -204,24 +189,25 @@ def _maybe_sample(df, sample):
     return df
 
 
-# %%
 def score(df, x, y, task=None, sample=5000):
     # TODO: log.warning when values have been dropped
     df = df[[x, y]].dropna()
+    if len(df) == 0:
+        raise Exception("After dropping missing values, there are no valid rows left")
     df = _maybe_sample(df, sample)
 
-    if task is not None:
-        task_name = task
-    else:
+    if task is None:
         task_name = _infer_task(df, x, y)
+    else:
+        task_name = task
 
     task = TASKS[task_name]
 
     if task_name in ["predict_constant", "predict_itself"]:
         model_score = 1
         ppscore = 1
         baseline_score = 1
-    elif task_name == "predict_id":
+    elif task_name == "predict_id":  # target is id
         model_score = 0
         ppscore = 0
         baseline_score = 0
@@ -247,11 +233,10 @@ def score(df, x, y, task=None, sample=5000):
     }
 
 
-# %%
 # def predictors(df, y, task=None, sorted=True):
 #    pass
 
-# %%
+
 def matrix(df, output="df", **kwargs):
     data = {}
     columns = list(df.columns)
@@ -272,11 +257,5 @@ def matrix(df, output="df", **kwargs):
         matrix = pd.DataFrame.from_dict(data, orient="index")
         matrix.columns = columns
         return matrix
-    else:
+    else:  # output == "dict"
         return data
-
-
-# %%
-# matrix(df)
-
-# %%
diff --git a/tests/test_calculation.py b/tests/test_calculation.py
@@ -0,0 +1,105 @@
+# # -*- coding: utf-8 -*-
+
+import pytest
+import pandas as pd
+import numpy as np
+
+import ppscore as pps
+
+
+def test__normalized_f1_score():
+    from ppscore.calculation import _normalized_f1_score
+
+    assert _normalized_f1_score(0.4, 0.5) == 0
+    assert _normalized_f1_score(0.75, 0.5) == 0.5
+
+
+def test__normalized_mae_score():
+    from ppscore.calculation import _normalized_mae_score
+
+    assert _normalized_mae_score(10, 5) == 0
+    assert _normalized_mae_score(5, 10) == 0.5
+
+
+def test__infer_task():
+    # each check is in the same order as in the original implementation
+    from ppscore.calculation import _infer_task
+
+    df = pd.read_csv("examples/titanic.csv")
+
+    assert _infer_task(df, "Age", "Age") == "predict_itself"
+
+    df["constant"] = 1
+    assert _infer_task(df, "Age", "constant") == "predict_constant"
+
+    assert _infer_task(df, "Age", "Survived") == "classification"
+
+    df = df.reset_index()
+    df["id"] = df["index"].astype(str)
+    assert _infer_task(df, "Age", "id") == "predict_id"
+
+    # classification because numeric but few categories
+    assert _infer_task(df, "Age", "SibSp") == "classification"
+
+    df["Pclass_category"] = df["Pclass"].astype("category")
+    assert _infer_task(df, "Age", "Pclass_category") == "classification"
+
+    df["Pclass_datetime"] = pd.to_datetime(df["Pclass"], infer_datetime_format=True)
+    with pytest.raises(Exception):
+        pps.score(df, "Age", "Pclass_datetime")
+
+    assert _infer_task(df, "Survived", "Age") == "regression"
+
+
+def test__maybe_sample():
+    from ppscore.calculation import _maybe_sample
+
+    df = pd.read_csv("examples/titanic.csv")
+    assert len(_maybe_sample(df, 10)) == 10
+
+
+def test_score():
+    df = pd.DataFrame()
+    df["x"] = np.random.uniform(-2, 2, 1_000)
+    df["error"] = np.random.uniform(-0.5, 0.5, 1_000)
+    df["y"] = df["x"] * df["x"] + df["error"]
+
+    df["constant"] = 1
+    df = df.reset_index()
+    df["id"] = df["index"].astype(str)
+
+    df["x_greater_0"] = df["x"] > 0
+    df["x_greater_0"] = df["x_greater_0"].astype(str)
+
+    df["nan"] = np.nan
+    with pytest.raises(Exception):
+        pps.score(df, "nan", "y")
+
+    assert pps.score(df, "x", "y", "regression")["task"] == "regression"
+
+    assert pps.score(df, "x", "constant")["task"] == "predict_constant"
+    assert pps.score(df, "x", "x")["task"] == "predict_itself"
+    assert pps.score(df, "x", "id")["task"] == "predict_id"
+
+    # feature is id
+    assert pps.score(df, "id", "y")["ppscore"] == 0
+
+    # numeric feature and target
+    assert pps.score(df, "x", "y")["ppscore"] > 0.5
+    assert pps.score(df, "y", "x")["ppscore"] < 0.05
+
+    # object feature or target
+    assert pps.score(df, "x", "x_greater_0")["ppscore"] > 0.6
+    assert pps.score(df, "x_greater_0", "x")["ppscore"] < 0.6
+
+
+def test_matrix():
+    df = pd.read_csv("examples/titanic.csv")
+    df = df[["Age", "Survived"]]
+
+    assert isinstance(pps.matrix(df), pd.DataFrame)
+    assert isinstance(pps.matrix(df, output="dict"), dict)
+
+    # matrix catches single score errors under the hood
+    df["Age_datetime"] = pd.to_datetime(df["Age"], infer_datetime_format=True)
+    assert pps.matrix(df[["Survived", "Age_datetime"]])["Survived"]["Age_datetime"] == 0
diff --git a/tests/test_skeleton.py b/tests/test_skeleton.py