Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pull] main from automl:main #18

Merged
merged 4 commits into from
Nov 7, 2022
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add GPU support for catboost and xgb
  • Loading branch information
noahho authored Nov 7, 2022
commit 30c47c08dd196905696cdb8a952db86431d9ebec
19 changes: 15 additions & 4 deletions tabpfn/scripts/tabular_baselines.py
Original file line number Diff line number Diff line change
Expand Up @@ -1241,11 +1241,15 @@ def catboost_metric(x, y, test_x, test_y, cat_features, metric_used, max_time=30
, one_hot=False
, cat_features=cat_features
, impute=False
, standardize=False)
, standardize=False
, gpu_id=None)

# Nans in categorical features must be encoded as separate class
x[:, cat_features], test_x[:, cat_features] = np.nan_to_num(x[:, cat_features], -1), np.nan_to_num(
test_x[:, cat_features], -1)

if gpu_id is not None:
gpu_params = {task_type="GPU", devices=gpu_id}

def make_pd_from_np(x):
data = pd.DataFrame(x)
Expand All @@ -1265,6 +1269,7 @@ def clf_(**params):
random_seed=int(y[:].sum()),
logging_level='Silent',
cat_features=cat_features,
**gpu_params,
**params)
else:
return CatBoostRegressor(
Expand All @@ -1274,6 +1279,7 @@ def clf_(**params):
random_seed=int(y[:].sum()),
logging_level='Silent',
cat_features=cat_features,
**gpu_params,
**params)

return eval_complete_f(x, y, test_x, test_y, 'xgb', clf_, metric_used, max_time, no_tune)
Expand All @@ -1294,11 +1300,14 @@ def clf_(**params):
'n_estimators': hp.randint('n_estimators', 100, 4000), # This is smaller than in paper
}

def xgb_metric(x, y, test_x, test_y, cat_features, metric_used, max_time=300, no_tune=None):
def xgb_metric(x, y, test_x, test_y, cat_features, metric_used, max_time=300, no_tune=None, gpu_id=None):
import xgboost as xgb
# XGB Documentation:
# XGB handles categorical data appropriately without using One Hot Encoding, categorical features are experimetal
# XGB handles missing values appropriately without imputation

if gpu_id is not None:
gpu_params = {tree_method='gpu_hist', gpu_id=gpu_id}

x, y, test_x, test_y = preprocess_impute(x, y, test_x, test_y
, one_hot=False
Expand All @@ -1309,14 +1318,16 @@ def xgb_metric(x, y, test_x, test_y, cat_features, metric_used, max_time=300, no
def clf_(**params):
if is_classification(metric_used):
return xgb.XGBClassifier(use_label_encoder=False
, nthread=1
, nthread=MULTITHREAD
, **params
, **gpu_params
, eval_metric=get_scoring_string(metric_used, usage='xgb') # AUC not implemented
)
else:
return xgb.XGBRegressor(use_label_encoder=False
, nthread=1
, nthread=MULTITHREAD
, **params
, **gpu_params
, eval_metric=get_scoring_string(metric_used, usage='xgb') # AUC not implemented
)

Expand Down