Skip to content

Commit

Permalink
Errors: check inputs and raise errors
Browse files Browse the repository at this point in the history
  • Loading branch information
FlorianWetschoreck committed Jul 21, 2020
1 parent 7b9ee67 commit caf8745
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 1 deletion.
38 changes: 38 additions & 0 deletions src/ppscore/calculation.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,19 @@ def score(df, x, y, task=None, sample=5000):
The dict enables introspection into the calculations that have been performed under the hood
"""

if not isinstance(df, pd.DataFrame):
raise TypeError(
f"The 'df' argument should be a pandas.DataFrame but you passed a {type(df)}\nPlease convert your input to a pandas.DataFrame"
)
if not x in df.columns:
raise ValueError(
f"The 'x' argument should be the name of a dataframe column but the name that you passed ({x}) is not a column in the given dataframe.\nPlease review the column name or your dataframe"
)
if not y in df.columns:
raise ValueError(
f"The 'y' argument should be the name of a dataframe column but the name that you passed ({y}) is not a column in the given dataframe.\nPlease review the column name or your dataframe"
)

if x == y:
task_name = "predict_itself"
else:
Expand Down Expand Up @@ -345,6 +358,23 @@ def predictors(df, y, output="df", sorted=True, **kwargs):
Either returns a df or a list of all the PPS dicts. This can be influenced
by the output argument
"""
if not isinstance(df, pd.DataFrame):
raise TypeError(
f"The 'df' argument should be a pandas.DataFrame but you passed a {type(df)}\nPlease convert your input to a pandas.DataFrame"
)
if not y in df.columns:
raise ValueError(
f"The 'y' argument should be the name of a dataframe column but the name that you passed ({y}) is not a column in the given dataframe.\nPlease review the column name or your dataframe"
)
if not output in ["df", "list"]:
raise ValueError(
f"""The 'output' argument should be one of ["df", "list"] but you passed: {output}\nPlease adjust your input to one of the valid values"""
)
if not sorted in [True, False]:
raise ValueError(
f"""The 'sorted' argument should be one of [True, False] but you passed: {sorted}\nPlease adjust your input to one of the valid values"""
)

scores = [score(df, column, y, **kwargs) for column in df if column != y]

if sorted:
Expand Down Expand Up @@ -384,6 +414,14 @@ def matrix(df, output="df", **kwargs):
pandas.DataFrame or Dict
Either returns a df or a dict with all the PPS dicts arranged by the target column. This can be influenced by the output argument
"""
if not isinstance(df, pd.DataFrame):
raise TypeError(
f"The 'df' argument should be a pandas.DataFrame but you passed a {type(df)}\nPlease convert your input to a pandas.DataFrame"
)
if not output in ["df", "dict"]:
raise ValueError(
f"""The 'output' argument should be one of ["df", "dict"] but you passed: {output}\nPlease adjust your input to one of the valid values"""
)
data = {}
columns = list(df.columns)

Expand Down
39 changes: 38 additions & 1 deletion tests/test_calculation.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,19 @@ def test_score():
df["x_greater_0_boolean_category"] = df["x_greater_0_boolean"].astype("category")

df["nan"] = np.nan
with pytest.raises(Exception):

# check input types
with pytest.raises(TypeError):
numpy_array = np.random.randn(10,10) # not a DataFrame
pps.score(numpy_array, "x", "y")

with pytest.raises(ValueError):
pps.score(df, "x_column_that_does_not_exist", "y")

with pytest.raises(ValueError):
pps.score(df, "x", "y_column_that_does_not_exist")

with pytest.raises(Exception): # After dropping missing values, there are no valid rows left
pps.score(df, "nan", "y")

assert pps.score(df, "x", "y", "regression")["task"] == "regression"
Expand Down Expand Up @@ -126,6 +138,21 @@ def test_predictors():
df = pd.read_csv("examples/titanic.csv")
df = df[["Age", y]]

# check input types
with pytest.raises(TypeError):
numpy_array = np.random.randn(10,10) # not a DataFrame
pps.predictors(numpy_array, y)

with pytest.raises(ValueError):
pps.predictors(df, "y_column_that_does_not_exist")

with pytest.raises(ValueError):
pps.predictors(df, y, output="invalid_output_type")

with pytest.raises(ValueError):
pps.predictors(df, y, sorted="invalid_value_for_sorted")

# check return types
result_df = pps.predictors(df, y)
assert isinstance(result_df, pd.DataFrame)
assert not y in result_df.index
Expand All @@ -134,11 +161,21 @@ def test_predictors():
assert isinstance(list_of_dicts, list)
assert isinstance(list_of_dicts[0], dict)

# the underlying calculations are tested as part of test_score

def test_matrix():
df = pd.read_csv("examples/titanic.csv")
df = df[["Age", "Survived"]]

# check input types
with pytest.raises(TypeError):
numpy_array = np.random.randn(10,10) # not a DataFrame
pps.matrix(numpy_array)

with pytest.raises(ValueError):
pps.matrix(df, output="invalid_output_type")

# check return types
assert isinstance(pps.matrix(df), pd.DataFrame)
assert isinstance(pps.matrix(df, output="dict"), dict)

Expand Down

0 comments on commit caf8745

Please sign in to comment.