Skip to content

Commit

Permalink
by minqi
Browse files Browse the repository at this point in the history
  • Loading branch information
Minqi824 committed Jun 1, 2023
1 parent 787490d commit 9d116a1
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 26 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,6 @@ run(suffix='', grid_mode='small', grid_size=1000, gan_specific=False, mode='end-
- 2023.05.13: provide ensembled topk components
- 2023.05.22: fixed the bug that modified the original data during the process of conducting the experiment.
- 2023.05.22: optimized code efficiency
- 2023.05.29: fixed the bug in REPEN model
- 2023.05.29: fixed the bug in REPEN model
- 2023.06.01: fixed the bug in data preprocessing of meta predictor (end-to-end mode)
- 2023.06.01: replace the LightGBM by XGBoost (which is faster) for ml-based meta predictor
45 changes: 26 additions & 19 deletions metaclassifier/meta_dl.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,8 +334,9 @@ def meta_fit_end2end(self, es=True, lr=1e-3):
# set seed for reproductive results
self.utils.set_seed(self.seed)

meta_data = []
for la in [5, 10, 20]:
meta_data = []; la_list = [5, 10, 20]
self.scaler_las = MinMaxScaler(clip=True).fit(np.array(la_list).reshape(-1, 1))
for la in la_list:
result = pd.read_csv('../result/result-' + self.metric + '-test-' + '-'.join(
[self.suffix, str(la), self.grid_mode, str(self.grid_size), str(self.seed)]) + '.csv')
result.rename(columns={'Unnamed: 0': 'Components'}, inplace=True)
Expand All @@ -362,10 +363,10 @@ def meta_fit_end2end(self, es=True, lr=1e-3):
meta_data_batch = []
for j in range(result.shape[0]):
if not pd.isnull(result.iloc[j, i]): # set nan to 0?
meta_data_batch.append({'X_train': data['X_train'],
meta_data_batch.append({'X_train': MinMaxScaler(clip=True).fit_transform(data['X_train']),
'y_train': data['y_train'],
'dataset_idx': i,
'la': la,
'la': self.scaler_las.transform(np.array([[la]])).item(),
'components': self.components_df_index.iloc[j, :].values,
'performance': result.iloc[j, i]})
if len(meta_data_batch) > 0:
Expand Down Expand Up @@ -413,9 +414,10 @@ def meta_predict_end2end(self, metric=None, top_k=5):
# notice that we can only use the training set of the testing task
preds = []; self.model.eval()
for i in range(self.components_df_index.shape[0]):
X_list_test = [torch.from_numpy(test_data['X_train']).float().to(self.device)]

X_list_test = [torch.from_numpy(MinMaxScaler(clip=True).fit_transform(test_data['X_train'])).float().to(self.device)]
y_list_test = [torch.from_numpy(test_data['y_train']).float().to(self.device)]
la_test = torch.tensor([[self.test_la]]).to(self.device)
la_test = torch.tensor([[self.scaler_las.transform(np.array([[self.test_la]])).item()]]).to(self.device)
components_test = torch.from_numpy(self.components_df_index.values[i, :].reshape(1, -1)).float().to(self.device)
with torch.no_grad():
_, _, pred = self.model(X_list_test, y_list_test, la_test, components_test)
Expand Down Expand Up @@ -487,27 +489,32 @@ def meta_predict_end2end(self, metric=None, top_k=5):

# demo for debugging
def run_demo():
run_meta = meta(seed=1,
run_meta = meta(seed=2,
metric='AUCPR',
suffix='formal',
grid_mode='small',
grid_size=1000,
test_dataset='44_Wilt')

clf = run_meta.meta_fit()
clf.test_la = 25
perf = clf.meta_predict()
print(perf)
loss_name='pearson',
ensemble=False,
test_dataset='40_vowels')

# clf = run_meta.meta_fit_end2end()
# clf.test_la = 10
# perf = clf.meta_predict_end2end()
# clf = run_meta.meta_fit()
# clf.test_la = 25
# perf = clf.meta_predict()
# print(perf)

clf = run_meta.meta_fit_end2end()
clf.test_la = 20
perf = clf.meta_predict_end2end()
print(perf)

# experiments for two-stage or end-to-end version of meta predictor
def run(suffix, grid_mode, grid_size, mode, loss_name=None, ensemble=False):
# run experiments for comparing proposed meta predictor and current SOTA methods
utils = Utils()
file_path = 'meta-' + grid_mode + '-' + str(grid_size)
if not os.path.exists('../result/' + file_path):
os.makedirs('../result/' + file_path)

for metric in ['AUCROC', 'AUCPR']:
# result of current SOTA models
Expand Down Expand Up @@ -603,9 +610,9 @@ def run(suffix, grid_mode, grid_size, mode, loss_name=None, ensemble=False):
result_SOTA['Meta'] = meta_classifier_performance

if mode == 'two-stage':
result_SOTA.to_csv('../result/' + metric + '-' + loss_name + '-' + str(ensemble) + '-meta-dl-twostage.csv', index=False)
result_SOTA.to_csv('../result/' + file_path + '/' + metric + '-' + loss_name + '-' + str(ensemble) + '-meta-dl-twostage.csv', index=False)
elif mode == 'end-to-end':
result_SOTA.to_csv('../result/' + metric + '-' + loss_name + '-' + str(ensemble) + '-meta-dl-end2end.csv', index=False)
result_SOTA.to_csv('../result/' + file_path + '/' + metric + '-' + loss_name + '-' + str(ensemble) + '-meta-dl-end2end.csv', index=False)
else:
raise NotImplementedError

Expand All @@ -619,4 +626,4 @@ def run(suffix, grid_mode, grid_size, mode, loss_name=None, ensemble=False):
# loss_name: ['pearson', 'ranknet', 'mse', 'weighted_mse']
# ensemble: bool
# mode: either 'two-stage' or 'end-to-end'
run(suffix='formal', grid_mode='large', grid_size=1000, loss_name='ranknet', ensemble=False, mode='end-to-end')
run(suffix='formal', grid_mode='small', grid_size=1000, loss_name='weighted_mse', ensemble=False, mode='two-stage')
11 changes: 5 additions & 6 deletions metaclassifier/meta_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,7 @@

from data_generator import DataGenerator
from utils import Utils
import lightgbm as lgb
from catboost import CatBoostRegressor
import xgboost as xgb
from components import Components

class meta():
Expand Down Expand Up @@ -161,8 +160,8 @@ def meta_fit(self):

X = np.concatenate((meta_features, las, components), axis=1)

if self.model_name == 'LightGBM':
self.model = lgb.LGBMRegressor(random_state=self.seed).fit(X, performances)
if self.model_name == 'XGBoost':
self.model = xgb.XGBRegressor(random_state=self.seed).fit(X, performances)
elif self.model_name == 'CatBoost':
self.model = catboost.CatBoostRegressor(random_state=self.seed).fit(X, performances)
else:
Expand Down Expand Up @@ -350,6 +349,6 @@ def run(suffix, grid_mode, grid_size, model_name, ensemble):

# formal experiments
# grid_mode: ['small', 'large']
# model_name: ['LightGBM', 'CatBoost']
# model_name: ['XGBoost', 'CatBoost']
# ensemble: bool
run(suffix='formal', grid_mode='small', grid_size=1000, model_name='CatBoost', ensemble=True)
run(suffix='formal', grid_mode='small', grid_size=1000, model_name='XGBoost', ensemble=True)

0 comments on commit 9d116a1

Please sign in to comment.