tests to tmp_file

dr-data · Jun 29, 2022 · 1c2cd3a · 1c2cd3a
1 parent 7dba60b
commit 1c2cd3a
Show file tree

Hide file tree

Showing 20 changed files with 1,913 additions and 4,666 deletions.
diff --git a/pycaret/internal/pipeline.py b/pycaret/internal/pipeline.py
@@ -83,6 +83,10 @@ def __init__(self, steps, *, memory=None, verbose=False):
         self._memory_fit = memory.cache(_fit_transform_one)
         self._memory_transform = memory.cache(_transform_one)
 
+    def __getattr__(self, name: str):
+        # override getattr to allow grabbing of final estimator attrs
+        return getattr(self._final_estimator, name)
+
     @property
     def feature_names_in_(self):
         return self._feature_names_in
@@ -218,10 +222,6 @@ def score(self, X, y, sample_weight=None):
 
         return self.steps[-1][-1].score(X, y, sample_weight=sample_weight)
 
-    def __getattr__(self, name: str):
-        # override getattr to allow grabbing of final estimator attrs
-        return getattr(self._final_estimator, name)
-
     def _clear_final_estimator_fit_vars(self, all: bool = False):
         vars_to_remove = []
         try:

diff --git a/pycaret/internal/preprocess/transformers.py b/pycaret/internal/preprocess/transformers.py
@@ -169,10 +169,13 @@ def _prepare_df(self, X, out):
 
     def fit(self, X=None, y=None, **fit_params):
         # Save the incoming feature names
+        feature_names_in = []
         if hasattr(X, "columns"):
-            self._feature_names_in = list(X.columns) + (
-                [y.name] if hasattr(y, "name") else []
-            )
+            feature_names_in += list(X.columns)
+        if hasattr(y, "name"):
+            feature_names_in += [y.name]
+        if feature_names_in:
+            self._feature_names_in = feature_names_in
 
         args = []
         transformer_params = signature(self.transformer.fit).parameters

diff --git a/pycaret/internal/pycaret_experiment/pycaret_experiment.py b/pycaret/internal/pycaret_experiment/pycaret_experiment.py
@@ -232,7 +232,8 @@ def get_logs(
             experiment = client.get_experiment(exp_id)
             if experiment is None:
                 raise ValueError(
-                    "No active run found. Check logging parameter in setup or to get logs for inactive run pass experiment_name."
+                    "No active run found. Check logging parameter in setup "
+                    "or to get logs for inactive run pass experiment_name."
                 )
 
             exp_name_log_ = experiment.name
@@ -241,7 +242,8 @@ def get_logs(
             experiment = client.get_experiment_by_name(exp_name_log_)
             if experiment is None:
                 raise ValueError(
-                    "No active run found. Check logging parameter in setup or to get logs for inactive run pass experiment_name."
+                    "No active run found. Check logging parameter in setup "
+                    "or to get logs for inactive run pass experiment_name."
                 )
 
             exp_id = client.get_experiment_by_name(exp_name_log_).experiment_id

diff --git a/pycaret/internal/pycaret_experiment/supervised_experiment.py b/pycaret/internal/pycaret_experiment/supervised_experiment.py
@@ -4727,7 +4727,7 @@ def finalize_model(
 
         if model_only:
             if self._ml_usecase != MLUsecase.TIME_SERIES:
-                return pipeline_final.steps[-1][1]
+                return pipeline_final._final_estimator
             else:
                 return self._get_final_model_from_pipeline(pipeline_final)
 

diff --git a/pycaret/tests/__init__.py b/pycaret/tests/__init__.py
diff --git a/pycaret/tests/conftest.py b/pycaret/tests/conftest.py
@@ -2,18 +2,24 @@
 import pandas as pd  # type: ignore
 import pytest
 
+from mlflow.tracking.client import MlflowClient
 from pycaret.containers.models.time_series import get_all_model_containers
 from pycaret.datasets import get_data
 from pycaret.time_series import TSForecastingExperiment
 from pycaret.utils.time_series import TSExogenousPresent
 
-from .time_series_test_utils import _BLEND_TEST_MODELS
+from time_series_test_utils import _BLEND_TEST_MODELS
 
 #############################
 #### Fixtures Start Here ####
 #############################
 
 
+@pytest.fixture(name="change_test_dir", autouse=True)
+def change_test_dir(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+
+
 @pytest.fixture(scope="session", name="load_pos_data")
 def load_pos_data():
     """Load Pycaret Airline dataset."""
@@ -118,7 +124,7 @@ def load_ts_models(load_setup):
     exp = load_setup
     model_containers = get_all_model_containers(exp)
 
-    from .time_series_test_utils import (  # TODO Put it back once preprocessing supports series as X
+    from time_series_test_utils import (  # TODO Put it back once preprocessing supports series as X
         _BLEND_TEST_MODELS,
     )
 

diff --git a/pycaret/tests/test_classification.py b/pycaret/tests/test_classification.py
@@ -19,12 +19,6 @@ def juice_dataframe():
     return pycaret.datasets.get_data("juice")
 
 
-@pytest.fixture(scope="module")
-def tracking_api():
-    client = MlflowClient()
-    return client
-
-
 @pytest.mark.parametrize("return_train_score", [True, False])
 def test_classification(juice_dataframe, return_train_score):
 
@@ -192,9 +186,7 @@ def test_classification_setup_fails_with_experiment_custom_multiples_inputs(
                 experiment_custom_tags=custom_tag,
             )
 
-    def test_classification_models_with_experiment_custom_tags(
-        self, juice_dataframe, tracking_api
-    ):
+    def test_classification_models_with_experiment_custom_tags(self, juice_dataframe):
         # init setup
         experiment_name = uuid.uuid4().hex
         _ = pycaret.classification.setup(
@@ -213,7 +205,9 @@ def test_classification_models_with_experiment_custom_tags(
         _ = pycaret.classification.compare_models(
             errors="raise", n_select=100, experiment_custom_tags={"pytest": "testing"}
         )[:3]
+
         # get experiment data
+        tracking_api = MlflowClient()
         experiment = [
             e for e in tracking_api.list_experiments() if e.name == experiment_name
         ][0]

diff --git a/pycaret/tests/test_nlp.py b/pycaret/tests/test_nlp.py
@@ -4,10 +4,10 @@
 sys.path.insert(0, os.path.abspath(".."))
 
 import uuid
+from mlflow.tracking import MlflowClient
 
 import pandas as pd
 import pytest
-from mlflow.tracking.client import MlflowClient
 
 import pycaret.datasets
 import pycaret.nlp
@@ -22,12 +22,6 @@ def kiva_dataframe():
     return pycaret.datasets.get_data("kiva")
 
 
-@pytest.fixture(scope="module")
-def tracking_api():
-    client = MlflowClient()
-    return client
-
-
 def test_nlp(kiva_dataframe):
     data = kiva_dataframe.head(1000)
     assert isinstance(data, pd.DataFrame)
@@ -126,7 +120,7 @@ def test_nlp_setup_fails_with_experiment_custom_multiples_inputs(self, custom_ta
                 experiment_custom_tags=custom_tag,
             )
 
-    def test_nlp_setup_with_experiment_custom_tags(self, kiva_dataframe, tracking_api):
+    def test_nlp_setup_with_experiment_custom_tags(self, kiva_dataframe):
         experiment_name = uuid.uuid4().hex
         # init setup
         _ = pycaret.nlp.setup(
@@ -138,7 +132,9 @@ def test_nlp_setup_with_experiment_custom_tags(self, kiva_dataframe, tracking_ap
             experiment_name=experiment_name,
             experiment_custom_tags={"pytest": "testing"},
         )
+
         # get experiment data
+        tracking_api = MlflowClient()
         experiment = [
             e for e in tracking_api.list_experiments() if e.name == experiment_name
         ][0]
@@ -154,9 +150,7 @@ def test_nlp_setup_with_experiment_custom_tags(self, kiva_dataframe, tracking_ap
             "pytest"
         )
 
-    def test_nlp_create_models_with_experiment_custom_tags(
-        self, kiva_dataframe, tracking_api
-    ):
+    def test_nlp_create_models_with_experiment_custom_tags(self, kiva_dataframe):
         experiment_name = uuid.uuid4().hex
         # init setup
         _ = pycaret.nlp.setup(
@@ -170,7 +164,9 @@ def test_nlp_create_models_with_experiment_custom_tags(
         _ = pycaret.nlp.create_model(
             "lda", experiment_custom_tags={"pytest": "testing"}
         )
+
         # get experiment data
+        tracking_api = MlflowClient()
         experiment = [
             e for e in tracking_api.list_experiments() if e.name == experiment_name
         ][0]

diff --git a/pycaret/tests/test_probability_threshold.py b/pycaret/tests/test_probability_threshold.py
@@ -117,13 +117,15 @@ def test_probability_threshold():
 
     # finalize model
     final_best = pycaret.classification.finalize_model(best)
-    assert final_best.probability_threshold == probability_threshold
+    assert isinstance(final_best._final_estimator, CustomProbabilityThresholdClassifier)
+    assert final_best._final_estimator.probability_threshold == probability_threshold
 
     # save model
     pycaret.classification.save_model(best, "best_model_23122019")
 
     # load model
     saved_best = pycaret.classification.load_model("best_model_23122019")
+    assert isinstance(saved_best._final_estimator, CustomProbabilityThresholdClassifier)
     assert saved_best._final_estimator.probability_threshold == probability_threshold
 
     assert 1 == 1

diff --git a/pycaret/tests/test_regression.py b/pycaret/tests/test_regression.py
@@ -6,7 +6,7 @@
 
 import pandas as pd
 import pytest
-from mlflow.tracking.client import MlflowClient
+from mlflow.tracking import MlflowClient
 
 import pycaret.datasets
 import pycaret.regression
@@ -17,12 +17,6 @@ def boston_dataframe():
     return pycaret.datasets.get_data("boston")
 
 
-@pytest.fixture(scope="module")
-def tracking_api():
-    client = MlflowClient()
-    return client
-
-
 @pytest.mark.parametrize("return_train_score", [True, False])
 def test_regression(boston_dataframe, return_train_score):
     # loading dataset
@@ -186,9 +180,7 @@ def test_regression_setup_fails_with_experiment_custom_multiples_inputs(
                 experiment_custom_tags=custom_tag,
             )
 
-    def test_regression_models_with_experiment_custom_tags(
-        self, boston_dataframe, tracking_api
-    ):
+    def test_regression_models_with_experiment_custom_tags(self, boston_dataframe):
         # init setup
         experiment_name = uuid.uuid4().hex
         _ = pycaret.regression.setup(
@@ -203,7 +195,9 @@ def test_regression_models_with_experiment_custom_tags(
         _ = pycaret.regression.compare_models(
             n_select=100, experiment_custom_tags={"pytest": "testing"}
         )[:2]
+
         # get experiment data
+        tracking_api = MlflowClient()
         experiment = [
             e for e in tracking_api.list_experiments() if e.name == experiment_name
         ][0]

diff --git a/pycaret/tests/test_time_series_base.py b/pycaret/tests/test_time_series_base.py
@@ -8,7 +8,7 @@
 
 from pycaret.time_series import TSForecastingExperiment
 
-from .time_series_test_utils import _return_compare_model_args, _return_model_parameters
+from time_series_test_utils import _return_compare_model_args, _return_model_parameters
 
 pytestmark = pytest.mark.filterwarnings("ignore::UserWarning")
 

diff --git a/pycaret/tests/test_time_series_plots.py b/pycaret/tests/test_time_series_plots.py
@@ -10,7 +10,7 @@
 from pycaret.internal.ensemble import _ENSEMBLE_METHODS
 from pycaret.time_series import TSForecastingExperiment
 
-from .time_series_test_utils import (
+from time_series_test_utils import (
     _ALL_PLOTS_DATA,
     _ALL_PLOTS_ESTIMATOR,
     _ALL_PLOTS_ESTIMATOR_NOT_DATA,

diff --git a/pycaret/tests/test_time_series_preprocess.py b/pycaret/tests/test_time_series_preprocess.py
@@ -8,7 +8,7 @@
 
 from pycaret.time_series import TSForecastingExperiment
 
-from .time_series_test_utils import (
+from time_series_test_utils import (
     _IMPUTE_METHODS_STR,
     _SCALE_METHODS,
     _TRANSFORMATION_METHODS,

diff --git a/pycaret/tests/test_time_series_setup.py b/pycaret/tests/test_time_series_setup.py
@@ -9,7 +9,7 @@
 from pycaret.datasets import get_data
 from pycaret.time_series import TSForecastingExperiment
 
-from .time_series_test_utils import (
+from time_series_test_utils import (
     _get_seasonal_values,
     _get_seasonal_values_alphanumeric,
     _return_setup_args_raises,

diff --git a/pycaret/tests/test_time_series_stats.py b/pycaret/tests/test_time_series_stats.py
@@ -3,7 +3,7 @@
 from pycaret.time_series import TSForecastingExperiment
 from pycaret.utils.time_series.exceptions import MissingDataError
 
-from .time_series_test_utils import (
+from time_series_test_utils import (
     _ALL_DATA_TYPES,
     _ALL_STATS_TESTS,
     _ALL_STATS_TESTS_MISSING_DATA,

diff --git a/pycaret/tests/test_time_series_tune_base.py b/pycaret/tests/test_time_series_tune_base.py
@@ -8,7 +8,7 @@
 
 from pycaret.time_series import TSForecastingExperiment
 
-from .time_series_test_utils import _ALL_METRICS
+from time_series_test_utils import _ALL_METRICS
 
 ##########################
 #### Tests Start Here ####

diff --git a/pycaret/tests/test_time_series_tune_grid.py b/pycaret/tests/test_time_series_tune_grid.py
@@ -6,7 +6,7 @@
 
 from pycaret.time_series import TSForecastingExperiment
 
-from .time_series_test_utils import _return_model_names
+from time_series_test_utils import _return_model_names
 
 pytestmark = pytest.mark.filterwarnings("ignore::UserWarning")
 

diff --git a/pycaret/tests/test_time_series_tune_random.py b/pycaret/tests/test_time_series_tune_random.py
@@ -6,7 +6,7 @@
 
 from pycaret.time_series import TSForecastingExperiment
 
-from .time_series_test_utils import _return_model_names
+from time_series_test_utils import _return_model_names
 
 pytestmark = pytest.mark.filterwarnings("ignore::UserWarning")
 

diff --git a/pycaret/tests/test_time_series_utils_plots.py b/pycaret/tests/test_time_series_utils_plots.py
@@ -12,7 +12,7 @@
     _reformat_dataframes_for_plots,
 )
 
-from .time_series_test_utils import _ALL_PLOTS
+from time_series_test_utils import _ALL_PLOTS
 
 pytestmark = pytest.mark.filterwarnings("ignore::UserWarning")