-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathUI.py
176 lines (140 loc) · 8.04 KB
/
UI.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import streamlit as st
import pandas as pd
import numpy as np
from LEURN import LEURN
import torch
from DATA import split_and_processing
from TRAINER import Trainer
import numpy as np
import openml
# Streamlit application layout
st.title("LEURN")
# Initialize or reset session states if necessary
if 'init' not in st.session_state:
st.session_state['training_completed'] = False
st.session_state['data_chosen'] = False
st.session_state['init'] = True
st.session_state['selected_row']=False
st.session_state['explanation_made']=False
st.session_state['result']=False
# Upload csv or excel
st.subheader("File Uploader")
uploaded_file = st.file_uploader("Upload your Excel/CSV file", type=["csv", "xlsx"])
if uploaded_file is not None:
# Reading the uploaded file
df = pd.read_csv(uploaded_file) if uploaded_file.type == "text/csv" else pd.read_excel(uploaded_file)
st.write("Data Preview:")
st.write(df.head())
st.subheader("Categorical Feature and Target Selection")
# Selecting the target variable
target = st.selectbox("Select the target variable", options=df.columns)
# Define features and target
X = df.drop(target, axis=1)
y = df[target]
attribute_names = X.columns
# Select categorical variables
st.write("Select categorical variables:")
categoricals = [st.checkbox(f"{col} is categorical", key=col) for col in X.columns]
# User input for model parameters
st.subheader("Model Training Parameters")
depth = st.selectbox("Select Model Depth", options=[1, 2, 3, 4, 5], index=2)
batch_size = st.selectbox("Select Batch Size", options=[64, 128, 256, 512, 1024, 2048, 4096], index=4)
lr = st.selectbox("Select Learning Rate", options=[1e-4, 5e-4, 1e-3, 5e-3, 1e-2], index=3)
epochs = st.number_input("Enter Number of Epochs", min_value=1, max_value=1000, value=300)
droprate = st.slider("Select Dropout Rate", min_value=0.0, max_value=1.0, value=0.0, step=0.05)
output_type = st.radio("Select Output Type (0: regression, 1: binary classification, 2: multi-class classification)", options=[0, 1, 2], index=0)
if st.button("Train Neural Network"):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#Split and process
X_train, X_val, X_test, y_train, y_val, y_test, preprocessor = split_and_processing(X, y, categoricals, output_type, attribute_names)
#Initialize model
model = LEURN(preprocessor, depth=depth, droprate=droprate).to(device)
#Train model
model_trainer = Trainer(model, X_train, X_val, y_train, y_val, lr=lr, batch_size=batch_size, epochs=epochs, problem_type=output_type, verbose=False)
model_trainer.train()
#Load best model
model.load_state_dict(model_trainer.best_model)
#Get performances
perf_train = model_trainer.evaluate(X_train, y_train)
perf_val = model_trainer.evaluate(X_val, y_val)
perf_test = model_trainer.evaluate(X_test, y_test)
st.session_state['perf_train']=perf_train
st.session_state['perf_val']=perf_val
st.session_state['perf_test']=perf_test
#Save test dataset and model to explain/generate later
X_test_inverse = preprocessor.inverse_transform_X(X_test)
X_test_inverse.to_csv('test.csv',index=False)
st.session_state['training_completed'] = True
st.session_state['model'] = model # Adjusted for compatibility
if st.session_state['training_completed'] == True:
#Print performances
st.write("Here are performances, try different hyperparameters if not satisfied")
if output_type == 0:
st.subheader("Training Results (MSE)")
elif output_type == 1:
st.subheader("Training Results (ROC-AUC)")
else:
st.subheader("Training Results (ACC)")
st.write(f"Training Score: {st.session_state['perf_train']:.4f}")
st.write(f"Validation Score: {st.session_state['perf_val']:.4f}")
st.write(f"Test Score: {st.session_state['perf_test']:.4f}")
# File uploader for explanation
st.subheader("Explain New Inputs")
uploaded_file_to_explain = st.file_uploader("Upload your Excel/CSV file to explain. Uploaded file should not have the target variable.", type=["csv", "xlsx"])
print(uploaded_file_to_explain)
if uploaded_file_to_explain is not None:
# Reading the uploaded file
X_test_inverse = pd.read_csv(uploaded_file_to_explain) if uploaded_file_to_explain.type == "text/csv" else pd.read_excel(uploaded_file_to_explain)
# Save DataFrame
st.session_state['X_test_inverse_df'] = X_test_inverse.to_json()
st.session_state['data_chosen'] = True # Flag to indicate data is chosen
if st.session_state['data_chosen'] == True:
# Load DataFrame from session state
X_test_inverse = pd.read_json(st.session_state['X_test_inverse_df'])
# Always display the DataFrame to ensure it's visible for selection
st.write("Test DataFrame:")
st.write(X_test_inverse)
# Let users select a row, selection is dynamic and updates session state
selected_index = st.selectbox("Select a row:", options=X_test_inverse.index, key="selected_index")
selected_row = X_test_inverse.loc[[st.session_state['selected_index']]]
st.write("Selected Data for Explanation:")
st.write(selected_row)
st.session_state['selected_row'] = selected_row
#Explain selected row
if st.button("Explain"):
model=st.session_state['model']
Exp_df_test_sample,result,result_original_format=model.explain(torch.from_numpy(model.preprocessor.transform_X(st.session_state['selected_row']).values.astype('float32')),include_causal_analysis=True)
st.session_state['explanation_made']=True
st.session_state['Exp_df_test_sample']=Exp_df_test_sample
st.session_state['result_original_format']=result_original_format
st.session_state['result']=result
#Print explanations
if st.session_state['explanation_made']==True:
st.write("Explanation DataFrame:")
st.write(st.session_state['Exp_df_test_sample'])
st.write("Predicted Output: (Network format)")
st.write(st.session_state['result'].detach().numpy().astype('str'))
if output_type==1:
if np.sign(st.session_state['result'].detach().numpy())>0:
st.write("Result here is positive; this means output class below is represented by positive sign. In the explanation dataframe, positive contributions increase class likelihood")
else:
st.write("Result here is negative; this means output class below is represented by negative sign. In the explanation dataframe, negative contributions increase class likelihood")
st.write("Predicted Output: (original format)")
st.write(st.session_state['result_original_format'])
#Data generation part
st.subheader("Generate Data From Scratch")
if st.button("Generate"):
model=st.session_state['model']
generated_sample_nn_friendly, generated_sample_original_input_format,output=model.generate()
Exp_df_generated_sample,result,result_original_format=model.explain(generated_sample_nn_friendly,include_causal_analysis=True)
st.write("Explanation DataFrame:")
st.write(Exp_df_generated_sample)
st.write("Predicted Output: (Network format)")
st.write(result.detach().numpy().astype('str'))
if output_type==1:
if np.sign(result.detach().numpy())>0:
st.write("Result here is positive; this means output class below is represented by positive sign. In the explanation dataframe, positive contributions increase class likelihood")
else:
st.write("Result here is negative; this means output class below is represented by negative sign. In the explanation dataframe, negative contributions increase class likelihood")
st.write("Predicted Output: (original format)")
st.write(result_original_format)