Skip to content

Commit

Permalink
update readme
Browse files Browse the repository at this point in the history
  • Loading branch information
VivekPa committed Jul 1, 2019
1 parent 742ba3e commit 8fa83f8
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 37 deletions.
26 changes: 22 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,15 +95,33 @@ For the random forest classification model, the results were better. I used tick
The base case used is merely predicting no moves in the market. The out of sample results were:

```bash
Model log loss: 0.17
Base log loss: 0.47
Tick bars:
Model log loss: 2.78
Base log loss: 4.81

Volume bars:
Model log loss: 1.69
Base log loss: 5.06

Dollar bars:
Model log loss: 2.56
Base log loss: 2.94
```

It is also useful to understand how much of an impact the autoencoders made, so I ran the model without autoencoders and the results were:

```bash
Model log loss: 0.55
Base log loss: 0.47
Tick bars:
Model log loss: 5.12
Base log loss: 4.81

Volume bars:
Model log loss: 3.25
Base log loss: 5.06

Dollar bars:
Model log loss: 3.62
Base log loss: 2.94
```


Expand Down
Binary file modified models/saved_models/autoencoder.h5
Binary file not shown.
68 changes: 35 additions & 33 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

print('Processing data...')
preprocess = DataProcessing(0.8)
df = preprocess.make_features(file_path="price_bars/tick_bars.csv", window=20,
csv_path="autoencoder_data", save_csv=True)
fulldata, y_values, train_x, train_y, test_x, test_y = preprocess.make_train_test(df_x=df, df_y=None, window=1,
csv_path="autoencoder_data", save_csv=True)

# print('Processing data...')
# preprocess = DataProcessing(0.8)
# df = preprocess.make_features(file_path=f"price_bars/dollar_bars.csv", window=20,
# csv_path="autoencoder_data", save_csv=True)
# fulldata, y_values, train_x, train_y, test_x, test_y = preprocess.make_train_test(df_x=df, df_y=None, window=1,
# csv_path="autoencoder_data", save_csv=True)

print('Loading data...')
a_train_x = pd.read_csv('data/processed_data/autoencoder_data/train_x.csv', index_col=0)
Expand All @@ -27,28 +28,28 @@
x_train_a = scaler.fit_transform(a_train_x.iloc[:, 1:])
x_test_a = scaler.transform(a_test_x.iloc[:, 1:])

autoencoder = AutoEncoder(20, x_train_a.shape[1])
autoencoder.build_model(100, 50, 50, 100)
# autoencoder = AutoEncoder(20, x_train_a.shape[1])
# autoencoder.build_model(100, 50, 50, 100)

print('Training model...')
autoencoder.train_model(autoencoder.autoencoder, x_train_a, epochs=20, model_name='autoencoder')
# print('Training model...')
# autoencoder.train_model(autoencoder.autoencoder, x_train_a, epochs=20, model_name='autoencoder')

print('Testing model...')
autoencoder.test_model(autoencoder.autoencoder, x_test_a)
# print('Testing model...')
# autoencoder.test_model(autoencoder.autoencoder, x_test_a)

print('Encoding data...')
a_full_data = pd.read_csv('data/processed_data/autoencoder_data/full_x.csv', index_col=0)
a_scaled_full = pd.DataFrame(scaler.transform(a_full_data.iloc[:, 1:]))
autoencoder.encode_data(a_scaled_full, csv_path='nn_data/full_x.csv')
# print('Encoding data...')
# a_full_data = pd.read_csv('data/processed_data/autoencoder_data/full_x.csv', index_col=0)
# a_scaled_full = pd.DataFrame(scaler.transform(a_full_data.iloc[:, 1:]))
# autoencoder.encode_data(a_scaled_full, csv_path='nn_data/full_x.csv')

print('Processing data...')
preprocess = DataProcessing(0.8)
df1 = pd.read_csv("data/processed_data/nn_data/full_x.csv", index_col=0)
df2 = pd.read_csv('data/processed_data/autoencoder_data/full_y.csv', index_col=0)
fulldata, y_values, train_x, train_y, test_x, test_y = preprocess.make_train_test(df_x=df1, df_y=df2, window=1,
csv_path="rf_data", has_y=True, binary_y=True, save_csv=True)
y = pd.read_csv('data/processed_data/rf_data/full_y.csv', index_col=0)
preprocess.check_labels(y)
# print('Processing data...')
# preprocess = DataProcessing(0.8)
# df1 = pd.read_csv("data/processed_data/nn_data/full_x.csv", index_col=0)
# df2 = pd.read_csv('data/processed_data/autoencoder_data/full_y.csv', index_col=0)
# fulldata, y_values, train_x, train_y, test_x, test_y = preprocess.make_train_test(df_x=df1, df_y=df2, window=1,
# csv_path="rf_data", has_y=True, binary_y=True, save_csv=True)
# y = pd.read_csv('data/processed_data/rf_data/full_y.csv', index_col=0)
# preprocess.check_labels(y)

print('Loading data...')
train_x = pd.read_csv('data/processed_data/rf_data/train_x.csv', index_col=0)
Expand Down Expand Up @@ -79,16 +80,17 @@
# plt.plot(np.array(test_y))
# plt.show()

print(x_train.shape)
print(train_y.shape)
print(x_test.shape)
print(test_y.shape)
# print(x_train.shape)
# print(train_y.shape)
# print(x_test.shape)
# print(test_y.shape)

rfmodel = RFModel(x_train.shape[1])
rfmodel.make_model(50, -1, verbose=1)
rfmodel.make_model(300, -1, verbose=1)
rfmodel.train_model(x_train, train_y)
rfmodel.test_model(x_test, test_y)

# rfmodel = RFModel(x_train_a.shape[1])
# rfmodel.make_model(50, -1, verbose=1)
# rfmodel.train_model(x_train_a, train_y)
# rfmodel.test_model(x_test_a, test_y)
rfmodel = RFModel(x_train_a.shape[1])
rfmodel.make_model(300, -1, verbose=1)
rfmodel.train_model(x_train_a, train_y)
rfmodel.test_model(x_test_a, test_y)

0 comments on commit 8fa83f8

Please sign in to comment.