Skip to content

Commit

Permalink
add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
FlorianWetschoreck committed Apr 9, 2020
1 parent e61c7d6 commit 479e23f
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 49 deletions.
2 changes: 1 addition & 1 deletion examples/quadrativ_effect_on_1mio_rows.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def heatmap(df):
df = pd.DataFrame()
df["x"] = np.random.uniform(-2, 2, 1_000_000)
df["error"] = np.random.uniform(-0.5, 0.5, 1_000_000)
df["y"] = df["x"]*df["x"] + df["error"]
df["y"] = df["x"] * df["x"] + df["error"]

# %%
sns.scatterplot(x="x", y="y", data=df.sample(10_000))
Expand Down
2 changes: 1 addition & 1 deletion src/ppscore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# Change here if project is renamed and does not equal the package name
dist_name = __name__
__version__ = get_distribution(dist_name).version
except DistributionNotFound:
except DistributionNotFound: # pragma: no cover
__version__ = "unknown"
finally:
del get_distribution, DistributionNotFound
Expand Down
41 changes: 10 additions & 31 deletions src/ppscore/calculation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# %%
from sklearn import tree
from sklearn import preprocessing
from sklearn.model_selection import cross_val_score
Expand Down Expand Up @@ -30,7 +29,6 @@
NUMERIC_AS_CATEGORIC_BREAKPOINT = 15


# %%
# https://scikit-learn.org/stable/modules/tree.html

# https://scikit-learn.org/stable/modules/cross_validation.html
Expand Down Expand Up @@ -60,29 +58,24 @@ def _calculate_model_cv_score_(df, target, feature, metric, model, **kwargs):
# reshaping needed because there is only 1 feature
feature_df = df[feature].values.reshape(-1, 1)

# Crossvalidation is stratifiedKFold for classification, KFold for regression
scores = cross_val_score(
model, feature_df, target_series, cv=CV_ITERATIONS, scoring=metric
)

# Crossvalidation is stratifiedKFold for classification, KFold for regression
return scores.mean()


# %%

# %%
def _normalized_mae_score(model_mae, naive_mae):
# 10, 5 >> 0 because worse than naive
# 10, 20 >> 0.5
# 5, 20 >> 0.75 = 1 - mae/base_mae
if model_mae > naive_mae:
# maybe add warning?
return 0
else:
return 1 - (model_mae / naive_mae)


# %%
def _mae_normalizer(df, y, model_score):
df["naive"] = df[y].mean()
baseline_score = mean_absolute_error(df[y], df["naive"]) # true, pred
Expand All @@ -91,7 +84,6 @@ def _mae_normalizer(df, y, model_score):
return ppscore, baseline_score


# %%
def _normalized_f1_score(model_f1, baseline_f1):
## F1 ranges from 0 to 1
## 1 is best
Expand All @@ -106,7 +98,6 @@ def _normalized_f1_score(model_f1, baseline_f1):
return f1_diff / scale_range # 0.1/0.3 = 0.33


# %%
def _f1_normalizer(df, y, model_score):
df["naive"] = df[y].value_counts().index[0]
baseline_score = f1_score(df[y], df["naive"], average="weighted")
Expand All @@ -115,7 +106,6 @@ def _f1_normalizer(df, y, model_score):
return ppscore, baseline_score


# %%
TASKS = {
"regression": {
"metric_name": "mean absolute error",
Expand Down Expand Up @@ -150,14 +140,11 @@ def _f1_normalizer(df, y, model_score):
}


# %%
def _infer_task(df, x, y):
if x == y:
return "predict_itself"

category_count = df[y].value_counts().count()
if category_count == 0:
raise Exception(f"The target column {y} does not have valid values.")
if category_count == 1:
return "predict_constant"
if category_count == 2:
Expand All @@ -183,10 +170,9 @@ def _infer_task(df, x, y):

raise Exception(
f"Could not infer a valid task based on the target {y}. The dtype {df[y].dtype} is not yet supported"
)
) # pragma: no cover


# %%
def _feature_is_id(df, x):
if not (is_string_dtype(df[x]) or is_categorical_dtype(df[x])):
return False
Expand All @@ -195,7 +181,6 @@ def _feature_is_id(df, x):
return category_count == len(df[x])


# %%
def _maybe_sample(df, sample):
if sample and len(df) > sample:
# this is a problem if x or y have more than sample=5000 categories
Expand All @@ -204,24 +189,25 @@ def _maybe_sample(df, sample):
return df


# %%
def score(df, x, y, task=None, sample=5000):
# TODO: log.warning when values have been dropped
df = df[[x, y]].dropna()
if len(df) == 0:
raise Exception("After dropping missing values, there are no valid rows left")
df = _maybe_sample(df, sample)

if task is not None:
task_name = task
else:
if task is None:
task_name = _infer_task(df, x, y)
else:
task_name = task

task = TASKS[task_name]

if task_name in ["predict_constant", "predict_itself"]:
model_score = 1
ppscore = 1
baseline_score = 1
elif task_name == "predict_id":
elif task_name == "predict_id": # target is id
model_score = 0
ppscore = 0
baseline_score = 0
Expand All @@ -247,11 +233,10 @@ def score(df, x, y, task=None, sample=5000):
}


# %%
# def predictors(df, y, task=None, sorted=True):
# pass

# %%

def matrix(df, output="df", **kwargs):
data = {}
columns = list(df.columns)
Expand All @@ -272,11 +257,5 @@ def matrix(df, output="df", **kwargs):
matrix = pd.DataFrame.from_dict(data, orient="index")
matrix.columns = columns
return matrix
else:
else: # output == "dict"
return data


# %%
# matrix(df)

# %%
105 changes: 105 additions & 0 deletions tests/test_calculation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# # -*- coding: utf-8 -*-

import pytest
import pandas as pd
import numpy as np

import ppscore as pps


def test__normalized_f1_score():
from ppscore.calculation import _normalized_f1_score

assert _normalized_f1_score(0.4, 0.5) == 0
assert _normalized_f1_score(0.75, 0.5) == 0.5


def test__normalized_mae_score():
from ppscore.calculation import _normalized_mae_score

assert _normalized_mae_score(10, 5) == 0
assert _normalized_mae_score(5, 10) == 0.5


def test__infer_task():
# each check is in the same order as in the original implementation
from ppscore.calculation import _infer_task

df = pd.read_csv("examples/titanic.csv")

assert _infer_task(df, "Age", "Age") == "predict_itself"

df["constant"] = 1
assert _infer_task(df, "Age", "constant") == "predict_constant"

assert _infer_task(df, "Age", "Survived") == "classification"

df = df.reset_index()
df["id"] = df["index"].astype(str)
assert _infer_task(df, "Age", "id") == "predict_id"

# classification because numeric but few categories
assert _infer_task(df, "Age", "SibSp") == "classification"

df["Pclass_category"] = df["Pclass"].astype("category")
assert _infer_task(df, "Age", "Pclass_category") == "classification"

df["Pclass_datetime"] = pd.to_datetime(df["Pclass"], infer_datetime_format=True)
with pytest.raises(Exception):
pps.score(df, "Age", "Pclass_datetime")

assert _infer_task(df, "Survived", "Age") == "regression"


def test__maybe_sample():
from ppscore.calculation import _maybe_sample

df = pd.read_csv("examples/titanic.csv")
assert len(_maybe_sample(df, 10)) == 10


def test_score():
df = pd.DataFrame()
df["x"] = np.random.uniform(-2, 2, 1_000)
df["error"] = np.random.uniform(-0.5, 0.5, 1_000)
df["y"] = df["x"] * df["x"] + df["error"]

df["constant"] = 1
df = df.reset_index()
df["id"] = df["index"].astype(str)

df["x_greater_0"] = df["x"] > 0
df["x_greater_0"] = df["x_greater_0"].astype(str)

df["nan"] = np.nan
with pytest.raises(Exception):
pps.score(df, "nan", "y")

assert pps.score(df, "x", "y", "regression")["task"] == "regression"

assert pps.score(df, "x", "constant")["task"] == "predict_constant"
assert pps.score(df, "x", "x")["task"] == "predict_itself"
assert pps.score(df, "x", "id")["task"] == "predict_id"

# feature is id
assert pps.score(df, "id", "y")["ppscore"] == 0

# numeric feature and target
assert pps.score(df, "x", "y")["ppscore"] > 0.5
assert pps.score(df, "y", "x")["ppscore"] < 0.05

# object feature or target
assert pps.score(df, "x", "x_greater_0")["ppscore"] > 0.6
assert pps.score(df, "x_greater_0", "x")["ppscore"] < 0.6


def test_matrix():
df = pd.read_csv("examples/titanic.csv")
df = df[["Age", "Survived"]]

assert isinstance(pps.matrix(df), pd.DataFrame)
assert isinstance(pps.matrix(df, output="dict"), dict)

# matrix catches single score errors under the hood
df["Age_datetime"] = pd.to_datetime(df["Age"], infer_datetime_format=True)
assert pps.matrix(df[["Survived", "Age_datetime"]])["Survived"]["Age_datetime"] == 0
16 changes: 0 additions & 16 deletions tests/test_skeleton.py

This file was deleted.

0 comments on commit 479e23f

Please sign in to comment.