Merge branch 'master' into fix_create_api

iykrichie · Aug 30, 2022 · e2c8b95 · e2c8b95
2 parents aaf657a + 60869f7
commit e2c8b95
Show file tree

Hide file tree

Showing 8 changed files with 42 additions and 36 deletions.
diff --git a/pycaret/classification/oop.py b/pycaret/classification/oop.py
@@ -923,7 +923,7 @@ def setup(
             if imputation_type == "simple":
                 container.append(["Numeric imputation", numeric_imputation])
                 container.append(["Categorical imputation", categorical_imputation])
-            else:
+            elif imputation_type == "iterative":
                 if isinstance(numeric_iterative_imputer, str):
                     num_imputer = numeric_iterative_imputer
                 else:

diff --git a/pycaret/internal/preprocess/preprocessor.py b/pycaret/internal/preprocess/preprocessor.py
@@ -615,7 +615,7 @@ def _encoding(self, max_encoding_ohe, encoding_method, rare_to_value, rare_value
 
         # Select columns for different encoding types
         one_hot_cols, rest_cols = [], []
-        for name, column in X_transformed.items():
+        for name, column in X_transformed[self._fxs["Categorical"]].items():
             n_unique = column.nunique()
             if n_unique == 2:
                 self._fxs["Ordinal"][name] = list(sorted(column.dropna().unique()))

diff --git a/pycaret/internal/preprocess/transformers.py b/pycaret/internal/preprocess/transformers.py
@@ -117,7 +117,7 @@ def _reorder_cols(self, df, original_df):
         for col in df:
             if col in original_df and col not in self._include:
                 raise ValueError(
-                    f"Column '{col}' returned by the transformer "
+                    f"Column '{col}' returned by transformer {self.transformer} "
                     "already exists in the original dataset."
                 )
 
@@ -387,7 +387,9 @@ def fit(self, X, y=None):
 
     def transform(self, X, y=None):
         if not self.group_names:
-            self.group_names = [f"group_{i}" for i in range(len(self.group_features))]
+            self.group_names = [
+                f"group_{i}" for i in range(1, len(self.group_features) + 1)
+            ]
 
         for name, group in zip(self.group_names, self.group_features):
             # Drop columns that are not in the dataframe (can be excluded)

diff --git a/pycaret/internal/pycaret_experiment/supervised_experiment.py b/pycaret/internal/pycaret_experiment/supervised_experiment.py
@@ -4840,11 +4840,13 @@ def predict_model(
 
         def replace_labels_in_column(pipeline, labels: pd.Series) -> pd.Series:
             # Check if there is a LabelEncoder in the pipeline
-            name = labels.name
-            index = labels.index
             le = get_label_encoder(pipeline)
             if le:
-                return pd.Series(le.inverse_transform(labels), name=name, index=index)
+                return pd.Series(
+                    data=le.inverse_transform(labels),
+                    name=labels.name,
+                    index=labels.index,
+                )
             else:
                 return labels
 
@@ -5039,27 +5041,31 @@ def replace_labels_in_column(pipeline, labels: pd.Series) -> pd.Series:
 
         if score is not None:
             pred = pred.astype(int)
+
             if not raw_score:
-                score = [s[pred[i]] for i, s in enumerate(score)]
-            try:
-                score = pd.DataFrame(score, index=X_test_.index)
-                if raw_score:
-                    score_columns = pd.Series(
-                        range(score.shape[1]), index=X_test_.index
-                    )
-                    if not encoded_labels:
-                        score_columns = replace_labels_in_column(
-                            pipeline, score_columns
-                        )
-                    score.columns = [f"{SCORE_COLUMN}_{l}" for l in score_columns]
+                score = pd.DataFrame(
+                    data=[s[pred[i]] for i, s in enumerate(score)],
+                    index=X_test_.index,
+                    columns=[SCORE_COLUMN],
+                )
+            else:
+                if not encoded_labels:
+                    le = get_label_encoder(pipeline)
+                    if le:
+                        columns = le.classes_
+                    else:
+                        columns = range(score.shape[1])
                 else:
-                    score.columns = [SCORE_COLUMN]
-                score = score.round(round)
-                old_index = X_test_.index
-                X_test_ = pd.concat((X_test_, score), axis=1)
-                X_test_.index = old_index
-            except:
-                pass
+                    columns = range(score.shape[1])
+
+                score = pd.DataFrame(
+                    data=score,
+                    index=X_test_.index,
+                    columns=[f"{SCORE_COLUMN}_{l}" for l in columns],
+                )
+
+            score = score.round(round)
+            X_test_ = pd.concat((X_test_, score), axis=1)
 
         # store predictions on hold-out in display_container
         if df_score is not None:
@@ -5431,20 +5437,16 @@ def create_app(self, estimator, app_kwargs: Optional[dict]):
         all_inputs = []
         app_kwargs = app_kwargs or {}
 
-        data_without_target = self.X[list(self.X_train_transformed.columns)]
-
-        for i in data_without_target.columns:
+        for i in self.X.columns:
             if i in self._fxs["Categorical"] or i in self._fxs["Ordinal"]:
-                all_inputs.append(
-                    gr.inputs.Dropdown(list(data_without_target[i].unique()), label=i)
-                )
+                all_inputs.append(gr.inputs.Dropdown(list(self.X[i].unique()), label=i))
             else:
                 all_inputs.append(gr.inputs.Textbox(label=i))
 
         def predict(*dict_input):
 
             input_df = pd.DataFrame.from_dict([dict_input])
-            input_df.columns = list(data_without_target.columns)
+            input_df.columns = list(self.X.columns)
             return (
                 self.predict_model(
                     estimator, data=input_df, **self._create_app_predict_kwargs

diff --git a/pycaret/regression/oop.py b/pycaret/regression/oop.py
@@ -882,7 +882,7 @@ def setup(
             if imputation_type == "simple":
                 container.append(["Numeric imputation", numeric_imputation])
                 container.append(["Categorical imputation", categorical_imputation])
-            else:
+            elif imputation_type == "iterative":
                 if isinstance(numeric_iterative_imputer, str):
                     num_imputer = numeric_iterative_imputer
                 else:

diff --git a/requirements-optional.txt b/requirements-optional.txt
@@ -6,6 +6,7 @@ pandas-profiling>=3.1.0
 explainerdashboard>=0.3.8  # For dashboard method
 autoviz>=0.1.36  # For EDA method
 fairlearn>=0.7.0  # For check_fairness method
+# deepchecks>=0.8.2  # For deep_check method  TODO: Add when compatible with plotly
 
 # Models
 xgboost>=1.1.0
@@ -26,6 +27,7 @@ scikit-optimize>=0.9.0
 # MLOps
 mlflow>=1.24.0
 gradio>=2.8.10
+boto3>=1.24.56  # For deploy_model method
 fastapi>=0.75.0  # For web api
 uvicorn>=0.17.6  # For web api
 m2cgen>=0.9.0  # For model conversion

diff --git a/requirements.txt b/requirements.txt
@@ -16,7 +16,7 @@ numba~=0.55.0
 requests>=2.27.1  # Required by pycaret.datasets
 psutil>=5.9.0
 markupsafe>=2.0.1  # Fixes Google Colab issue
-importlib_metadata
+importlib_metadata>=4.12.0
 
 # Plotting
 matplotlib>=3.3.0

diff --git a/tests/test_preprocess.py b/tests/test_preprocess.py
@@ -241,7 +241,7 @@ def test_encoding_grouping_rare_categories():
     data = pycaret.datasets.get_data("juice")
     pc = pycaret.classification.setup(data, rare_to_value=0.5)
     X, _ = pc.pipeline.transform(pc.X, pc.y)
-    assert "rare" in pc.pipeline.steps[-4][1].transformer.mapping[0]["mapping"]
+    assert "rare" in pc.pipeline.steps[-2][1].transformer.mapping[0]["mapping"]
 
 
 def test_encoding_categorical_features():