Skip to content

Commit

Permalink
small edits
Browse files Browse the repository at this point in the history
  • Loading branch information
aaronalt committed May 21, 2021
1 parent 6fda806 commit c89a3a6
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 182 deletions.
4 changes: 2 additions & 2 deletions database.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,8 @@ def fit_model(self, *args, **kwargs):
if r_type == "poly.fit":
model = Model([], [], col)
rss_max = 1000
# Iterates through orders and returns fit with minimum residual error, with weight=1/y
# todo: experiment with different weights, programmatically (for paper, what to do next)
# Iterates through orders and returns fit with
# minimum residual error, with weight=1/y
for i in range(1, order + 1):
weight = 1 / self.df[col_name]
fn = P.fit(self.df['x'], self.df[col_name], i, full=True, w=weight)
Expand Down
180 changes: 1 addition & 179 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,189 +43,11 @@ def main():
- test: lower order vs. 1st best fit
- fn(y2): same as y1
- fn(y3): needs linear and not polynomial function to reduce rss
- test: new linear function vs. current best fit
- fn(y4): best order is 3, but can rss be reduced? still too large
'''






"""# Instantiate new Data objects
train, ideal, test = Data("training_data", to_db=True), Data("ideal_functions", to_db=True), Data("test_data")
'''Plot training dataset as subplots'''
train_graph = Graph("Training Data", df=train.csv_to_df())
train_graph.make_subplots(train_graph.title)
# Dictionary for different iterations' polynomial order
_n = {
'y1': [5, 21, 36],
'y2': [5, 22, 36],
'y4': [3, 9, 27]
}
'''
Fit models to training data, map ideal function and plot
'''
# 1st iteration
# Empty data objects to hold created models, processed data
ideal_funs_dict = {'x': train.df['x']}
train_master = train.csv_to_df()
# train.df_to_html(train_master)
models = {}
# y1
nl_p = train.fit_model(1, ideal, 'poly.fit', order=_n['y1'][0])
models['y1'] = nl_p
train_graph.plot_model(nl_p, plt_type='best fit', with_rmse=True)
ideal_funs_dict[nl_p.ideal_col] = nl_p.ideal_col_array
train_master['y1_if'] = nl_p.ideal_col
train_master['y1_max_err'] = nl_p.max_dev
train_master['y1_best_fit'] = nl_p
# y2
nl_7 = train.fit_model(2, ideal, 'poly.fit', order=_n['y2'][0])
models['y2'] = nl_7
train_graph.plot_model(nl_7, plt_type='best fit', with_rmse=True)
ideal_funs_dict[nl_7.ideal_col] = nl_7.ideal_col_array
train_master['y2_if'] = nl_7.ideal_col
train_master['y2_max_err'] = nl_7.max_dev
train_master['y2_best_fit'] = nl_7
# y3
lm_p = train.fit_model(3, ideal, 'poly.fit', 20, print_table=True)
models['y3'] = lm_p
train_graph.plot_model(lm_p, plt_type='best fit', with_rmse=True)
ideal_funs_dict[lm_p.ideal_col] = lm_p.ideal_col_array
train_master['y3_if'] = lm_p.ideal_col
train_master['y3_max_err'] = lm_p.max_dev
train_master['y3_best_fit'] = lm_p
# y4
cm_p = train.fit_model(4, ideal, 'poly.fit', order=_n['y4'][0])
models['y4'] = cm_p
train_graph.plot_model(cm_p, plt_type='best fit', with_rmse=True)
ideal_funs_dict[cm_p.ideal_col] = cm_p.ideal_col_array
train_master['y4_if'] = cm_p.ideal_col
train_master['y4_max_err'] = cm_p.max_dev
train_master['y4_best_fit'] = cm_p
train_master['x'] = [round(i, 2) for i in train_master['x']]
# Compare ideal functions to training data and choose best fit
ideal_funs_df = pd.DataFrame(data=ideal_funs_dict)
ideal_funs_df = ideal_funs_df.set_index('x')
test = Data("test_data")
test_df = test.csv_to_df()
test_model = Model(test_df['x'], test_df['y'], 1, df=test_df)
test_df_1, _tm1 = test_model.match_ideal_functions(ideal_funs_df, train_master, models)
# 2nd iteration
# New empty objects
ideal_funs_dict = {'x': train.df['x']}
train_master = train.csv_to_df()
models_2 = {}
# y1
nl_p2 = train.fit_model(1, ideal, 'poly.fit', order=_n['y1'][1])
models_2['y1'] = nl_p2
train_graph.plot_model(nl_p2, plt_type='best fit', with_rmse=True)
ideal_funs_dict[nl_p.ideal_col] = nl_p2.ideal_col_array
train_master['y1_if'] = nl_p2.ideal_col
train_master['y1_max_err'] = nl_p2.max_dev
train_master['y1_best_fit'] = nl_p2
# y2
nl_72 = train.fit_model(2, ideal, 'poly.fit', order=_n['y2'][1])
models_2['y2'] = nl_72
train_graph.plot_model(nl_72, plt_type='best fit', with_rmse=True)
ideal_funs_dict[nl_72.ideal_col] = nl_72.ideal_col_array
train_master['y2_if'] = nl_72.ideal_col
train_master['y2_max_err'] = nl_72.max_dev
train_master['y2_best_fit'] = nl_72
# y3
lm_p2 = train.fit_model(3, ideal, 'linear')
models_2['y3'] = lm_p2
train_graph.plot_model(lm_p2, plt_type='best fit', with_rmse=True)
ideal_funs_dict[lm_p2.ideal_col] = lm_p2.ideal_col_array
train_master['y3_if'] = lm_p2.ideal_col
train_master['y3_max_err'] = lm_p2.max_dev
train_master['y3_best_fit'] = lm_p2
# y4
cm_p2 = train.fit_model(4, ideal, 'poly.fit', order=_n['y4'][1])
models_2['y4'] = cm_p2
train_graph.plot_model(cm_p2, plt_type='best fit', with_rmse=True)
ideal_funs_dict[cm_p2.ideal_col] = cm_p2.ideal_col_array
train_master['y4_if'] = cm_p2.ideal_col
train_master['y4_max_err'] = cm_p2.max_dev
train_master['y4_best_fit'] = cm_p2
# Compare functions
ideal_funs_df = pd.DataFrame(data=ideal_funs_dict)
ideal_funs_df = ideal_funs_df.set_index('x')
test_model = Model(test_df['x'], test_df['y'], 1, df=test_df)
test_df_2, _tm2 = test_model.match_ideal_functions(ideal_funs_df, train_master, models_2)
# 3rd iteration
# New empty objects
ideal_funs_dict = {'x': train.df['x']}
train_master = train.csv_to_df()
models_3 = {}
# y1
nl_p3 = train.fit_model(1, ideal, 'poly.fit', order=_n['y1'][2], print_table=True)
models_3['y1'] = nl_p3
train_graph.plot_model(nl_p3, plt_type='best fit', with_rmse=True)
ideal_funs_dict[nl_p3.ideal_col] = nl_p3.ideal_col_array
train_master['y1_if'] = nl_p3.ideal_col
train_master['y1_max_err'] = nl_p3.max_dev
train_master['y1_best_fit'] = nl_p3
# y2
nl_73 = train.fit_model(2, ideal, 'poly.fit', order=_n['y2'][2], print_table=True)
models_3['y2'] = nl_73
train_graph.plot_model(nl_73, plt_type='best fit', with_rmse=True)
ideal_funs_dict[nl_73.ideal_col] = nl_73.ideal_col_array
train_master['y2_if'] = nl_73.ideal_col
train_master['y2_max_err'] = nl_73.max_dev
train_master['y2_best_fit'] = nl_73
# y3
lm_p3 = train.fit_model(3, ideal, 'linear')
models_3['y3'] = lm_p3
train_graph.plot_model(lm_p3, plt_type='best fit', with_rmse=True)
ideal_funs_dict[lm_p3.ideal_col] = lm_p3.ideal_col_array
train_master['y3_if'] = lm_p3.ideal_col
train_master['y3_max_err'] = lm_p3.max_dev
train_master['y3_best_fit'] = lm_p3
# y4
cm_p3 = train.fit_model(4, ideal, 'poly.fit', order=_n['y4'][2], print_table=True)
models_3['y4'] = cm_p3
train_graph.plot_model(cm_p3, plt_type='best fit', with_rmse=True)
ideal_funs_dict[cm_p3.ideal_col] = cm_p3.ideal_col_array
train_master['y4_if'] = cm_p3.ideal_col
train_master['y4_max_err'] = cm_p3.max_dev
train_master['y4_best_fit'] = cm_p3
# Compare functions
ideal_funs_df = pd.DataFrame(data=ideal_funs_dict)
ideal_funs_df = ideal_funs_df.set_index('x')
test_model = Model(test_df['x'], test_df['y'], 1, df=test_df)
print(f'ideal funs df: \n{ideal_funs_df}')
test_df_3, _tm3 = test_model.match_ideal_functions(ideal_funs_df, train_master, models_3)
# Plot comparisons between polynomial orders for each training function
train_graph.make_subplots('Model Comparison',
models={'m1': models, 'm2': models_2, 'm3': models_3})"""
# todo: experiment with different weights, programmatically (for paper, what to do next)


if __name__ == "__main__":
Expand Down
1 change: 0 additions & 1 deletion model.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
Data Science, M.Sc.
"""


from itertools import starmap
from sklearn.metrics import mean_squared_error, max_error
import pandas as pd
Expand Down

0 comments on commit c89a3a6

Please sign in to comment.