Skip to content

Commit

Permalink
Merge branch 'master' into fix_create_api
Browse files Browse the repository at this point in the history
  • Loading branch information
tvdboom committed Aug 30, 2022
2 parents aaf657a + 60869f7 commit e2c8b95
Show file tree
Hide file tree
Showing 8 changed files with 42 additions and 36 deletions.
2 changes: 1 addition & 1 deletion pycaret/classification/oop.py
Original file line number Diff line number Diff line change
Expand Up @@ -923,7 +923,7 @@ def setup(
if imputation_type == "simple":
container.append(["Numeric imputation", numeric_imputation])
container.append(["Categorical imputation", categorical_imputation])
else:
elif imputation_type == "iterative":
if isinstance(numeric_iterative_imputer, str):
num_imputer = numeric_iterative_imputer
else:
Expand Down
2 changes: 1 addition & 1 deletion pycaret/internal/preprocess/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,7 @@ def _encoding(self, max_encoding_ohe, encoding_method, rare_to_value, rare_value

# Select columns for different encoding types
one_hot_cols, rest_cols = [], []
for name, column in X_transformed.items():
for name, column in X_transformed[self._fxs["Categorical"]].items():
n_unique = column.nunique()
if n_unique == 2:
self._fxs["Ordinal"][name] = list(sorted(column.dropna().unique()))
Expand Down
6 changes: 4 additions & 2 deletions pycaret/internal/preprocess/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def _reorder_cols(self, df, original_df):
for col in df:
if col in original_df and col not in self._include:
raise ValueError(
f"Column '{col}' returned by the transformer "
f"Column '{col}' returned by transformer {self.transformer} "
"already exists in the original dataset."
)

Expand Down Expand Up @@ -387,7 +387,9 @@ def fit(self, X, y=None):

def transform(self, X, y=None):
if not self.group_names:
self.group_names = [f"group_{i}" for i in range(len(self.group_features))]
self.group_names = [
f"group_{i}" for i in range(1, len(self.group_features) + 1)
]

for name, group in zip(self.group_names, self.group_features):
# Drop columns that are not in the dataframe (can be excluded)
Expand Down
60 changes: 31 additions & 29 deletions pycaret/internal/pycaret_experiment/supervised_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -4840,11 +4840,13 @@ def predict_model(

def replace_labels_in_column(pipeline, labels: pd.Series) -> pd.Series:
# Check if there is a LabelEncoder in the pipeline
name = labels.name
index = labels.index
le = get_label_encoder(pipeline)
if le:
return pd.Series(le.inverse_transform(labels), name=name, index=index)
return pd.Series(
data=le.inverse_transform(labels),
name=labels.name,
index=labels.index,
)
else:
return labels

Expand Down Expand Up @@ -5039,27 +5041,31 @@ def replace_labels_in_column(pipeline, labels: pd.Series) -> pd.Series:

if score is not None:
pred = pred.astype(int)

if not raw_score:
score = [s[pred[i]] for i, s in enumerate(score)]
try:
score = pd.DataFrame(score, index=X_test_.index)
if raw_score:
score_columns = pd.Series(
range(score.shape[1]), index=X_test_.index
)
if not encoded_labels:
score_columns = replace_labels_in_column(
pipeline, score_columns
)
score.columns = [f"{SCORE_COLUMN}_{l}" for l in score_columns]
score = pd.DataFrame(
data=[s[pred[i]] for i, s in enumerate(score)],
index=X_test_.index,
columns=[SCORE_COLUMN],
)
else:
if not encoded_labels:
le = get_label_encoder(pipeline)
if le:
columns = le.classes_
else:
columns = range(score.shape[1])
else:
score.columns = [SCORE_COLUMN]
score = score.round(round)
old_index = X_test_.index
X_test_ = pd.concat((X_test_, score), axis=1)
X_test_.index = old_index
except:
pass
columns = range(score.shape[1])

score = pd.DataFrame(
data=score,
index=X_test_.index,
columns=[f"{SCORE_COLUMN}_{l}" for l in columns],
)

score = score.round(round)
X_test_ = pd.concat((X_test_, score), axis=1)

# store predictions on hold-out in display_container
if df_score is not None:
Expand Down Expand Up @@ -5431,20 +5437,16 @@ def create_app(self, estimator, app_kwargs: Optional[dict]):
all_inputs = []
app_kwargs = app_kwargs or {}

data_without_target = self.X[list(self.X_train_transformed.columns)]

for i in data_without_target.columns:
for i in self.X.columns:
if i in self._fxs["Categorical"] or i in self._fxs["Ordinal"]:
all_inputs.append(
gr.inputs.Dropdown(list(data_without_target[i].unique()), label=i)
)
all_inputs.append(gr.inputs.Dropdown(list(self.X[i].unique()), label=i))
else:
all_inputs.append(gr.inputs.Textbox(label=i))

def predict(*dict_input):

input_df = pd.DataFrame.from_dict([dict_input])
input_df.columns = list(data_without_target.columns)
input_df.columns = list(self.X.columns)
return (
self.predict_model(
estimator, data=input_df, **self._create_app_predict_kwargs
Expand Down
2 changes: 1 addition & 1 deletion pycaret/regression/oop.py
Original file line number Diff line number Diff line change
Expand Up @@ -882,7 +882,7 @@ def setup(
if imputation_type == "simple":
container.append(["Numeric imputation", numeric_imputation])
container.append(["Categorical imputation", categorical_imputation])
else:
elif imputation_type == "iterative":
if isinstance(numeric_iterative_imputer, str):
num_imputer = numeric_iterative_imputer
else:
Expand Down
2 changes: 2 additions & 0 deletions requirements-optional.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ pandas-profiling>=3.1.0
explainerdashboard>=0.3.8 # For dashboard method
autoviz>=0.1.36 # For EDA method
fairlearn>=0.7.0 # For check_fairness method
# deepchecks>=0.8.2 # For deep_check method TODO: Add when compatible with plotly

# Models
xgboost>=1.1.0
Expand All @@ -26,6 +27,7 @@ scikit-optimize>=0.9.0
# MLOps
mlflow>=1.24.0
gradio>=2.8.10
boto3>=1.24.56 # For deploy_model method
fastapi>=0.75.0 # For web api
uvicorn>=0.17.6 # For web api
m2cgen>=0.9.0 # For model conversion
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ numba~=0.55.0
requests>=2.27.1 # Required by pycaret.datasets
psutil>=5.9.0
markupsafe>=2.0.1 # Fixes Google Colab issue
importlib_metadata
importlib_metadata>=4.12.0

# Plotting
matplotlib>=3.3.0
Expand Down
2 changes: 1 addition & 1 deletion tests/test_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def test_encoding_grouping_rare_categories():
data = pycaret.datasets.get_data("juice")
pc = pycaret.classification.setup(data, rare_to_value=0.5)
X, _ = pc.pipeline.transform(pc.X, pc.y)
assert "rare" in pc.pipeline.steps[-4][1].transformer.mapping[0]["mapping"]
assert "rare" in pc.pipeline.steps[-2][1].transformer.mapping[0]["mapping"]


def test_encoding_categorical_features():
Expand Down

0 comments on commit e2c8b95

Please sign in to comment.