Errors: check inputs and raise errors

valteresj2 · Jul 21, 2020 · caf8745 · caf8745
1 parent 7b9ee67
commit caf8745
Show file tree

Hide file tree

Showing 2 changed files with 76 additions and 1 deletion.
diff --git a/src/ppscore/calculation.py b/src/ppscore/calculation.py
@@ -273,6 +273,19 @@ def score(df, x, y, task=None, sample=5000):
         The dict enables introspection into the calculations that have been performed under the hood
     """
 
+    if not isinstance(df, pd.DataFrame):
+        raise TypeError(
+            f"The 'df' argument should be a pandas.DataFrame but you passed a {type(df)}\nPlease convert your input to a pandas.DataFrame"
+        )
+    if not x in df.columns:
+        raise ValueError(
+            f"The 'x' argument should be the name of a dataframe column but the name that you passed ({x}) is not a column in the given dataframe.\nPlease review the column name or your dataframe"
+        )
+    if not y in df.columns:
+        raise ValueError(
+            f"The 'y' argument should be the name of a dataframe column but the name that you passed ({y}) is not a column in the given dataframe.\nPlease review the column name or your dataframe"
+        )
+
     if x == y:
         task_name = "predict_itself"
     else:
@@ -345,6 +358,23 @@ def predictors(df, y, output="df", sorted=True, **kwargs):
         Either returns a df or a list of all the PPS dicts. This can be influenced
         by the output argument
     """
+    if not isinstance(df, pd.DataFrame):
+        raise TypeError(
+            f"The 'df' argument should be a pandas.DataFrame but you passed a {type(df)}\nPlease convert your input to a pandas.DataFrame"
+        )
+    if not y in df.columns:
+        raise ValueError(
+            f"The 'y' argument should be the name of a dataframe column but the name that you passed ({y}) is not a column in the given dataframe.\nPlease review the column name or your dataframe"
+        )
+    if not output in ["df", "list"]:
+        raise ValueError(
+            f"""The 'output' argument should be one of ["df", "list"] but you passed: {output}\nPlease adjust your input to one of the valid values"""
+        )
+    if not sorted in [True, False]:
+        raise ValueError(
+            f"""The 'sorted' argument should be one of [True, False] but you passed: {sorted}\nPlease adjust your input to one of the valid values"""
+        )
+
     scores = [score(df, column, y, **kwargs) for column in df if column != y]
 
     if sorted:
@@ -384,6 +414,14 @@ def matrix(df, output="df", **kwargs):
     pandas.DataFrame or Dict
         Either returns a df or a dict with all the PPS dicts arranged by the target column. This can be influenced by the output argument
     """
+    if not isinstance(df, pd.DataFrame):
+        raise TypeError(
+            f"The 'df' argument should be a pandas.DataFrame but you passed a {type(df)}\nPlease convert your input to a pandas.DataFrame"
+        )
+    if not output in ["df", "dict"]:
+        raise ValueError(
+            f"""The 'output' argument should be one of ["df", "dict"] but you passed: {output}\nPlease adjust your input to one of the valid values"""
+        )
     data = {}
     columns = list(df.columns)
 

diff --git a/tests/test_calculation.py b/tests/test_calculation.py
@@ -80,7 +80,19 @@ def test_score():
     df["x_greater_0_boolean_category"] = df["x_greater_0_boolean"].astype("category")
 
     df["nan"] = np.nan
-    with pytest.raises(Exception):
+
+    # check input types
+    with pytest.raises(TypeError):
+        numpy_array = np.random.randn(10,10)  # not a DataFrame
+        pps.score(numpy_array, "x", "y")
+
+    with pytest.raises(ValueError):
+        pps.score(df, "x_column_that_does_not_exist", "y")
+
+    with pytest.raises(ValueError):
+        pps.score(df, "x", "y_column_that_does_not_exist")
+
+    with pytest.raises(Exception):  # After dropping missing values, there are no valid rows left
         pps.score(df, "nan", "y")
 
     assert pps.score(df, "x", "y", "regression")["task"] == "regression"
@@ -126,6 +138,21 @@ def test_predictors():
     df = pd.read_csv("examples/titanic.csv")
     df = df[["Age", y]]
 
+    # check input types
+    with pytest.raises(TypeError):
+        numpy_array = np.random.randn(10,10)  # not a DataFrame
+        pps.predictors(numpy_array, y)
+
+    with pytest.raises(ValueError):
+        pps.predictors(df, "y_column_that_does_not_exist")
+
+    with pytest.raises(ValueError):
+        pps.predictors(df, y, output="invalid_output_type")
+
+    with pytest.raises(ValueError):
+        pps.predictors(df, y, sorted="invalid_value_for_sorted")
+
+    # check return types
     result_df = pps.predictors(df, y)
     assert isinstance(result_df, pd.DataFrame)
     assert not y in result_df.index
@@ -134,11 +161,21 @@ def test_predictors():
     assert isinstance(list_of_dicts, list)
     assert isinstance(list_of_dicts[0], dict)
 
+    # the underlying calculations are tested as part of test_score
 
 def test_matrix():
     df = pd.read_csv("examples/titanic.csv")
     df = df[["Age", "Survived"]]
 
+    # check input types
+    with pytest.raises(TypeError):
+        numpy_array = np.random.randn(10,10)  # not a DataFrame
+        pps.matrix(numpy_array)
+
+    with pytest.raises(ValueError):
+        pps.matrix(df, output="invalid_output_type")
+
+    # check return types
     assert isinstance(pps.matrix(df), pd.DataFrame)
     assert isinstance(pps.matrix(df, output="dict"), dict)