Merge branch 'master' into python_3p10

iykrichie · Aug 28, 2022 · 73c0b4d · 73c0b4d
2 parents 13fda9e + d021131
commit 73c0b4d
Show file tree

Hide file tree

Showing 104 changed files with 185,155 additions and 987 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -23,11 +23,11 @@ jobs:
       - name: Run black
         uses: psf/black@stable
         with:
-          src: pycaret
+          src: pycaret tests
       - name: Check imports
         uses: jamescurtin/isort-action@master
         with:
-          sortPaths: pycaret
+          sortPaths: pycaret tests
 
   # JOBS MUST START WITH test !!!!
   test:
@@ -59,14 +59,14 @@ jobs:
           pip list
       - name: Remove tests
         run: |
-          rm pycaret/tests/test_classification_tuning.py
-          rm pycaret/tests/test_classification_plots.py
-          rm pycaret/tests/test_regression_plots.py
-          rm pycaret/tests/test_regression_tuning.py
-          rm pycaret/tests/test_time_series_tune_grid.py
-          rm pycaret/tests/test_time_series_tune_random.py
-          rm pycaret/tests/test_time_series_plots.py
-          rm pycaret/tests/test_time_series_utils_plots.py
+          rm tests/test_classification_tuning.py
+          rm tests/test_classification_plots.py
+          rm tests/test_regression_plots.py
+          rm tests/test_regression_tuning.py
+          rm tests/test_time_series_tune_grid.py
+          rm tests/test_time_series_tune_random.py
+          rm tests/test_time_series_plots.py
+          rm tests/test_time_series_utils_plots.py
       - name: Test with pytest
         run: pytest
 
@@ -99,7 +99,7 @@ jobs:
           pip list
       - name: Remove tests
         run: |
-          remove-item pycaret/tests/* -Include @('test_classification_tuning.py','test_classification_plots.py','test_regression_plots.py', 'test_regression_tuning.py', 'test_time_series_tune_grid.py', 'test_time_series_tune_random.py', 'test_create_api.py', 'test_create_docker.py', 'test_drift_report.py', 'test_eda.py', 'test_time_series_plots.py', 'test_time_series_utils_plots.py')
+          remove-item tests/* -Include @('test_clustering.py', 'test_classification_tuning.py','test_classification_plots.py','test_regression_plots.py', 'test_regression_tuning.py', 'test_time_series_tune_grid.py', 'test_time_series_tune_random.py', 'test_create_api.py', 'test_create_docker.py', 'test_drift_report.py', 'test_eda.py', 'test_time_series_plots.py', 'test_time_series_utils_plots.py')
       - name: Test with pytest
         run: pytest
 
@@ -123,7 +123,7 @@ jobs:
   #       python -m spacy download en
   #   - name: Remove tests
   #     run: |
-  #       find pycaret/tests -type f -not -name '__init__.py' -not -name 'test_classification_tuning.py' -not -name 'test_regression_tuning.py' -delete
+  #       find tests -type f -not -name '__init__.py' -not -name 'test_classification_tuning.py' -not -name 'test_regression_tuning.py' -delete
   #   - name: Test with pytest
   #     run: pytest
 
@@ -146,7 +146,7 @@ jobs:
   #       python -m spacy download en
   #   - name: Remove tests
   #     run: |
-  #       remove-item pycaret/tests/* -Exclude @('__init__.py','test_classification_tuning.py')
+  #       remove-item tests/* -Exclude @('__init__.py','test_classification_tuning.py')
   #   - name: Test with pytest
   #     run: pytest
 
@@ -169,7 +169,7 @@ jobs:
   #       python -m spacy download en
   #   - name: Remove tests
   #     run: |
-  #       remove-item pycaret/tests/* -Exclude @('__init__.py','test_regression_tuning.py')
+  #       remove-item tests/* -Exclude @('__init__.py','test_regression_tuning.py')
   #   - name: Test with pytest
   #     run: pytest
 
@@ -197,7 +197,7 @@ jobs:
           pip list
       - name: Remove tests
         run: |
-          find pycaret/tests -type f -not -name '__init__.py' -not -name 'test_classification_plots.py' -not -name 'test_regression_plots.py' -not -name 'conftest.py' -not -name 'time_series_test_utils.py' -not -name 'test_time_series_plots.py' -not -name 'test_time_series_utils_plots.py' -delete
+          find tests -type f -not -name '__init__.py' -not -name 'test_classification_plots.py' -not -name 'test_regression_plots.py' -not -name 'conftest.py' -not -name 'time_series_test_utils.py' -not -name 'test_time_series_plots.py' -not -name 'test_time_series_utils_plots.py' -delete
       - name: Test with pytest
         run: pytest
 
@@ -231,7 +231,7 @@ jobs:
           pip list
       - name: Remove tests
         run: |
-          find pycaret/tests -type f -not -name '__init__.py' -not -name 'conftest.py' -not -name 'time_series_test_utils.py' -not -name 'test_time_series_tune_random.py' -delete
+          find tests -type f -not -name '__init__.py' -not -name 'conftest.py' -not -name 'time_series_test_utils.py' -not -name 'test_time_series_tune_random.py' -delete
       - name: Test with pytest
         run: pytest
 
@@ -265,6 +265,6 @@ jobs:
           pip list
       - name: Remove tests
         run: |
-          find pycaret/tests -type f -not -name '__init__.py' -not -name 'conftest.py' -not -name 'time_series_test_utils.py' -not -name 'test_time_series_tune_grid.py' -delete
+          find tests -type f -not -name '__init__.py' -not -name 'conftest.py' -not -name 'time_series_test_utils.py' -not -name 'test_time_series_tune_grid.py' -delete
       - name: Test with pytest
         run: pytest
diff --git a/.gitignore b/.gitignore
@@ -2,14 +2,14 @@ venv/
 .ipynb_checkpoints/
 .pytest_cache/
 __pycache__/
-catboost_info
+mlruns/
+catboost_info/
 /stacker.py
 /stacknet.py
 /cli.py
 /app.py
 /cli_app.py
 /logo.png
-/mlruns
 /model
 /Results.html
 /build
@@ -21,13 +21,9 @@ catboost_info
 /logs.log
 examples/logs.log
 .log
-pycaret/__pycache__/
-pycaret/tests/__pycache__/
 tutorials/logs.log
 /docs/build
 .idea/
-mlruns/
-/pycaret/tests/mlruns/
 *.log
 trained_models/
 *.pkl
@@ -38,5 +34,4 @@ demo4.py
 dask*/
 /.venv
 /.devcontainer
-tmp
-tmp/*
+tmp/
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -19,7 +19,7 @@ If you are interested or have already written Medium story covering `PyCaret`. Y
 If you are willing to make major contribution you can always look out for the active sprint under `Projects` and discuss the proposal with sprint leader. Current active sprint is `2.2 - major refactoring`. This sprint is led by `Yard1`.
 
 ## What we currently need help on?
-- Improving unit-test cases https://github.com/pycaret/pycaret/tree/master/pycaret/tests
+- Improving unit-test cases https://github.com/pycaret/pycaret/tree/master/tests
 - Major refactoring in `preprocess.py` to accommodate distributed processing
 - Example Notebooks required. Send PR to https://github.com/pycaret/pycaret/tree/master/examples
 

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1 +1,2 @@
 include README.md LICENSE requirements.txt requirements-optional.txt requirements-test.txt
+recursive-include tests *.py
diff --git a/docs/source/contribute.rst b/docs/source/contribute.rst
@@ -49,7 +49,7 @@ sprint leader.
 What we currently need help on?
 -------------------------------
 -  Improving unit-test cases and test coverage
-   https://github.com/pycaret/pycaret/tree/master/pycaret/tests
+   https://github.com/pycaret/pycaret/tree/master/tests
 -  Refactor preprocessing pipeline to support GPU
 -  Dask Integration
 

diff --git a/pycaret/anomaly/functional.py b/pycaret/anomaly/functional.py
@@ -5,8 +5,9 @@
 from joblib.memory import Memory
 
 from pycaret.anomaly.oop import AnomalyExperiment
-from pycaret.internal.utils import DATAFRAME_LIKE, check_if_global_is_not_none
 from pycaret.loggers.base_logger import BaseLogger
+from pycaret.utils.constants import DATAFRAME_LIKE, SEQUENCE_LIKE
+from pycaret.utils.generic import check_if_global_is_not_none
 
 _EXPERIMENT_CLASS = AnomalyExperiment
 _CURRENT_EXPERIMENT: Optional[AnomalyExperiment] = None
@@ -20,6 +21,7 @@
 
 def setup(
     data: DATAFRAME_LIKE,
+    index: Union[bool, int, str, SEQUENCE_LIKE] = False,
     ordinal_features: Optional[Dict[str, list]] = None,
     numeric_features: Optional[List[str]] = None,
     categorical_features: Optional[List[str]] = None,
@@ -95,6 +97,15 @@ def setup(
         names.
 
 
+    index: bool, int, str or sequence, default = False
+        Handle indices in the `data` dataframe.
+            - If False: Reset to RangeIndex.
+            - If True: Keep the provided index.
+            - If int: Position of the column to use as index.
+            - If str: Name of the column to use as index.
+            - If sequence: Array with shape=(n_samples,) to use as index.
+
+
     ordinal_features: dict, default = None
         Categorical features to be encoded ordinally. For example, a categorical
         feature with 'low', 'medium', 'high' values where low < medium < high can
@@ -144,7 +155,7 @@ def setup(
         when preprocess is set to False.
 
 
-    create_date_columns: list of str, default=["day", "month", "year"]
+    create_date_columns: list of str, default = ["day", "month", "year"]
         Columns to create from the date features. Note that created features
         with zero variance (e.g. the feature hour in a column that only contains
         dates) are ignored. Allowed values are datetime attributes from
@@ -423,6 +434,7 @@ def setup(
     set_current_experiment(exp)
     return exp.setup(
         data=data,
+        index=index,
         ordinal_features=ordinal_features,
         numeric_features=numeric_features,
         categorical_features=categorical_features,

diff --git a/pycaret/anomaly/oop.py b/pycaret/anomaly/oop.py
@@ -12,7 +12,7 @@
 from pycaret.internal.pycaret_experiment.unsupervised_experiment import (
     _UnsupervisedExperiment,
 )
-from pycaret.internal.pycaret_experiment.utils import MLUsecase
+from pycaret.utils.generic import MLUsecase
 
 LOGGER = get_logger()
 

diff --git a/pycaret/classification/functional.py b/pycaret/classification/functional.py
@@ -6,12 +6,9 @@
 
 from pycaret.classification.oop import ClassificationExperiment
 from pycaret.internal.parallel.parallel_backend import ParallelBackend
-from pycaret.internal.utils import (
-    DATAFRAME_LIKE,
-    TARGET_LIKE,
-    check_if_global_is_not_none,
-)
 from pycaret.loggers.base_logger import BaseLogger
+from pycaret.utils.constants import DATAFRAME_LIKE, SEQUENCE_LIKE, TARGET_LIKE
+from pycaret.utils.generic import check_if_global_is_not_none
 
 _EXPERIMENT_CLASS = ClassificationExperiment
 _CURRENT_EXPERIMENT: Optional[ClassificationExperiment] = None
@@ -27,6 +24,7 @@ def setup(
     data: Optional[DATAFRAME_LIKE] = None,
     data_func: Optional[Callable[[], DATAFRAME_LIKE]] = None,
     target: TARGET_LIKE = -1,
+    index: Union[bool, int, str, SEQUENCE_LIKE] = False,
     train_size: float = 0.7,
     test_data: Optional[DATAFRAME_LIKE] = None,
     ordinal_features: Optional[Dict[str, list]] = None,
@@ -61,7 +59,7 @@ def setup(
     outliers_method: str = "iforest",
     outliers_threshold: float = 0.05,
     fix_imbalance: bool = False,
-    fix_imbalance_method: Optional[Any] = None,
+    fix_imbalance_method: Union[str, Any] = "SMOTE",
     transformation: bool = False,
     transformation_method: str = "yeo-johnson",
     normalize: bool = False,
@@ -134,15 +132,23 @@ def setup(
         multiclass.
 
 
+    index: bool, int, str or sequence, default = False
+        Handle indices in the `data` dataframe.
+            - If False: Reset to RangeIndex.
+            - If True: Keep the provided index.
+            - If int: Position of the column to use as index.
+            - If str: Name of the column to use as index.
+            - If sequence: Array with shape=(n_samples,) to use as index.
+
+
     train_size: float, default = 0.7
         Proportion of the dataset to be used for training and validation. Should be
         between 0.0 and 1.0.
 
 
     test_data: dataframe-like or None, default = None
         If not None, test_data is used as a hold-out set and `train_size` parameter
-        is ignored. The columns of data and test_data must match. If it's a pandas
-        dataframe, the indices must match as well.
+        is ignored. The columns of data and test_data must match.
 
 
     ordinal_features: dict, default = None
@@ -194,7 +200,7 @@ def setup(
         when preprocess is set to False.
 
 
-    create_date_columns: list of str, default=["day", "month", "year"]
+    create_date_columns: list of str, default = ["day", "month", "year"]
         Columns to create from the date features. Note that created features
         with zero variance (e.g. the feature hour in a column that only contains
         dates) are ignored. Allowed values are datetime attributes from
@@ -349,10 +355,10 @@ def setup(
         Technique) is applied by default to create synthetic datapoints for minority class.
 
 
-    fix_imbalance_method: imblearn estimator, default = None
-        When ``fix_imbalance`` is True, `imblearn` compatible estimator with a
-        `fit_resample` method can be passed. If None, `imblearn.over_sampling.SMOTE`
-        is used.
+    fix_imbalance_method: str or imblearn estimator, default = "SMOTE"
+        Estimator with which to perform class balancing. Choose from the name
+        of an `imblearn` estimator, or a custom instance of such. Ignored when
+        `fix_imbalance=False`.
 
 
     transformation: bool, default = False
@@ -591,6 +597,7 @@ def setup(
         data=data,
         data_func=data_func,
         target=target,
+        index=index,
         train_size=train_size,
         test_data=test_data,
         ordinal_features=ordinal_features,
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		include README.md LICENSE requirements.txt requirements-optional.txt requirements-test.txt
		recursive-include tests *.py