-
Notifications
You must be signed in to change notification settings - Fork 32
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit acd69cd
Showing
23 changed files
with
2,602 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
__pycache__/ | ||
*.p | ||
*.h5 | ||
*.hdf5 | ||
logs* | ||
.ipynb_checkpoints | ||
*.mp4 | ||
/data | ||
.vscode | ||
|
||
# Distribution / packaging | ||
.Python | ||
env/ | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
MIT License | ||
|
||
Copyright (c) 2019 Markus Frey | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
[![license](https://img.shields.io/github/license/mashape/apistatus.svg)](https://github.com/CYHSM/DeepInsight/blob/master/LICENSE.md) | ||
![py36 status](https://img.shields.io/badge/python3.6-supported-green.svg) | ||
|
||
## DeepInsight: A general framework for interpreting wide-band neural activity | ||
|
||
DeepInsight is a toolbox for the analysis and interpretation of wide-band neural activity and can be applied on unsorted neural data. This means the traditional step of spike-sorting can be omitted and the raw data can be used directly as input, providing a more objective way of measuring decoding performance. | ||
![Model Architecture](media/model_architecture.png) | ||
|
||
|
||
|
||
## Example Usage | ||
```python | ||
import deepinsight | ||
|
||
# Load your electrophysiological or calcium-imaging data | ||
(raw_data, raw_timestamps, output, output_timestamps, info) = deepinsight.util.tetrode.read_tetrode_data(fp_raw_file) | ||
|
||
# Transform raw data to frequency domain | ||
deepinsight.preprocess.preprocess_input(fp_deepinsight, raw_data, sampling_rate=info['sampling_rate'], channels=info['channels']) | ||
|
||
# Prepare outputs | ||
deepinsight.util.tetrode.preprocess_output(fp_deepinsight, raw_timestamps, output, output_timestamps, sampling_rate=info['sampling_rate']) | ||
|
||
# Train the model | ||
deepinsight.train.run_from_path(fp_deepinsight, loss_functions, loss_weights) | ||
|
||
# Get loss and shuffled loss for influence plot | ||
losses, output_predictions, indices = deepinsight.analyse.get_model_loss(fp_deepinsight, stepsize=10) | ||
shuffled_losses = deepinsight.analyse.get_shuffled_model_loss(fp_deepinsight, axis=1, stepsize=10) | ||
|
||
# Plot influence across behaviours | ||
deepinsight.visualize.plot_residuals(fp_deepinsight, frequency_spacing=2) | ||
``` | ||
|
||
See also the [jupyter notebook](notebooks/deepinsight_example_usage.ipynb) for a full example for decoding behaviours from tetrode CA1 recordings. | ||
|
||
Following Video shows the performance of the model trained on position (left), head direction (top right) and speed (bottom right): | ||
![Model Performance](media/decoding_error.gif) | ||
|
||
## Installation | ||
For now install DeepInsight with the following command: | ||
``` | ||
pip install -e git+https://github.com/CYHSM/DeepInsight.git | ||
``` | ||
A full pip installation and Colab integration will be available soon. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from . import util | ||
from . import preprocess | ||
from . import architecture | ||
from . import train | ||
from . import analyse | ||
from . import visualize |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,245 @@ | ||
""" | ||
DeepInsight Toolbox | ||
© Markus Frey | ||
https://github.com/CYHSM/DeepInsight | ||
Licensed under MIT License | ||
""" | ||
import os | ||
|
||
import keras.backend as K | ||
import numpy as np | ||
import h5py | ||
|
||
from . import util | ||
|
||
|
||
def get_model_loss(fp_hdf_out, stepsize=1, shuffles=None): | ||
""" | ||
Loops across cross validated models and calculates loss and predictions for full experiment length | ||
Parameters | ||
---------- | ||
fp_hdf_out : str | ||
File path to HDF5 file | ||
stepsize : int, optional | ||
Determines how many samples will be evaluated. 1 -> N samples evaluated, | ||
2 -> N/2 samples evaluated, etc..., by default 1 | ||
shuffles : dict, optional | ||
If wavelets should be shuffled, important for calculating influence scores, by default None | ||
Returns | ||
------- | ||
losses : (N,1) array_like | ||
Loss between predicted and ground truth observation | ||
predictions : dict | ||
Dictionary with predictions for each behaviour, each item in dict has size (N, Z) with Z the dimensions of the sample (e.g. Z_position=2, Z_speed=1, ...) | ||
indices : (N,1) array_like | ||
Indices which were evaluated, important when taking stepsize unequal to 1 | ||
""" | ||
dirname = os.path.dirname(fp_hdf_out) | ||
filename = os.path.basename(fp_hdf_out)[0:-3] | ||
cv_results = [] | ||
(_, _, _, opts) = util.hdf5.load_model_with_opts(dirname + '/models/' + filename + '_model_{}.h5'.format(0)) | ||
loss_names = opts['loss_names'] | ||
time_shift = opts['model_timesteps'] | ||
for k in range(0, opts['num_cvs']): | ||
K.clear_session() | ||
# Find folders | ||
model_path = dirname + '/models/' + filename + '_model_{}.h5'.format(k) | ||
# Load model and generators | ||
print('This model {}'.format(model_path)) | ||
(model, training_generator, testing_generator, opts) = util.hdf5.load_model_with_opts(model_path) | ||
# ----------------------------------------------------------------------------------------------- | ||
print('Getting loss, predictions and saliencies') | ||
if shuffles is not None: | ||
testing_generator = shuffle_wavelets(training_generator, testing_generator, shuffles) | ||
losses, predictions, indices = calculate_losses_from_generator( | ||
testing_generator, model, verbose=1, stepsize=stepsize) | ||
# ----------------------------------------------------------------------------------------------- | ||
cv_results.append((losses, predictions, indices)) | ||
cv_results = np.array(cv_results) | ||
# Reshape cv_results | ||
losses = np.concatenate(cv_results[:, 0], axis=0) | ||
predictions = {k: [] for k in loss_names} | ||
for out in cv_results[:, 1]: | ||
for p, name in zip(out, loss_names): | ||
predictions[name].append(p) | ||
for key, item in predictions.items(): | ||
if stepsize > 1: | ||
tmp_output = np.concatenate(predictions[key], axis=0)[:, -1, :] | ||
else: | ||
tmp_output = np.concatenate(predictions[key], axis=0)[:, -1, :] | ||
tmp_output = np.array([np.pad(l, [time_shift, 0], mode='constant', constant_values=[l[0], 0]) | ||
for l in tmp_output.transpose()]).transpose() | ||
predictions[key] = tmp_output | ||
indices = np.concatenate(cv_results[:, 2], axis=0) | ||
# We only take the last timestep for decoding, so decoder does not see any part of the future | ||
indices = indices + time_shift | ||
if stepsize > 1: | ||
losses = losses[:, :, -1] | ||
else: | ||
losses = losses[:, :, -1] | ||
losses = np.array([np.pad(l, [time_shift, 0], mode='constant', constant_values=[l[0], 0]) | ||
for l in losses.transpose()]).transpose() | ||
indices = np.arange(0, losses.shape[0]) | ||
# Also save to HDF5 | ||
hdf5_file = h5py.File(fp_hdf_out, mode='a') | ||
for key, item in predictions.items(): | ||
util.hdf5.create_or_update(hdf5_file, dataset_name="analysis/predictions/{}".format(key), | ||
dataset_shape=item.shape, dataset_type=np.float32, dataset_value=item) | ||
util.hdf5.create_or_update(hdf5_file, dataset_name="analysis/losses", | ||
dataset_shape=losses.shape, dataset_type=np.float32, dataset_value=losses) | ||
util.hdf5.create_or_update(hdf5_file, dataset_name="analysis/indices", | ||
dataset_shape=indices.shape, dataset_type=np.int64, dataset_value=indices) | ||
hdf5_file.close() | ||
|
||
return losses, predictions, indices | ||
|
||
|
||
def get_shuffled_model_loss(fp_hdf_out, stepsize=1, axis=0): | ||
""" | ||
Shuffles the wavelets and recalculates error | ||
Parameters | ||
---------- | ||
fp_hdf_out : str | ||
File path to HDF5 file | ||
stepsize : int, optional | ||
Determines how many samples will be evaluated. 1 -> N samples evaluated, | ||
2 -> N/2 samples evaluated, etc..., by default 1 | ||
axis : int, optional | ||
Which axis to shuffle | ||
Returns | ||
------- | ||
shuffled_losses : (N,1) array_like | ||
Loss between predicted and ground truth observation for shuffled wavelets on specified axis | ||
""" | ||
if axis == 0: | ||
raise ValueError('Shuffling across time dimension (axis=0) not supported yet.') | ||
hdf5_file = h5py.File(fp_hdf_out, mode='r') | ||
tmp_wavelets_shape = hdf5_file['inputs/wavelets'].shape | ||
hdf5_file.close() | ||
shuffled_losses = [] | ||
for s in range(0, tmp_wavelets_shape[axis]): | ||
if axis == 1: | ||
losses, _, _ = get_model_loss(fp_hdf_out, stepsize=stepsize, shuffles={'f': s}) | ||
elif axis == 2: | ||
losses, _, _ = get_model_loss(fp_hdf_out, stepsize=stepsize, shuffles={'c': s}) | ||
shuffled_losses.append(losses) | ||
shuffled_losses = np.array(shuffled_losses) | ||
# Also save to HDF5 | ||
hdf5_file = h5py.File(fp_hdf_out, mode='a') | ||
util.hdf5.create_or_update(hdf5_file, dataset_name="analysis/influence/shuffled_losses", | ||
dataset_shape=shuffled_losses.shape, dataset_type=np.float32, dataset_value=shuffled_losses) | ||
hdf5_file.close() | ||
|
||
return shuffled_losses | ||
|
||
|
||
def calculate_losses_from_generator(tg, model, num_steps=None, stepsize=1, verbose=0): | ||
""" | ||
Keras evaluate_generator only returns a scalar loss (mean) while predict_generator only returns the predictions but not the real labels | ||
TODO Make it batch size independent | ||
Parameters | ||
---------- | ||
tg : object | ||
Data generator | ||
model : object | ||
Keras model | ||
num_steps : int, optional | ||
How many steps should be evaluated, by default None (runs through full experiment) | ||
stepsize : int, optional | ||
Determines how many samples will be evaluated. 1 -> N samples evaluated, | ||
2 -> N/2 samples evaluated, etc..., by default 1 | ||
verbose : int, optional | ||
Verbosity level | ||
Returns | ||
------- | ||
losses : (N,1) array_like | ||
Loss between predicted and ground truth observation | ||
predictions : dict | ||
Dictionary with predictions for each behaviour, each item in dict has size (N, Z) with Z the dimensions of the sample (e.g. Z_position=2, Z_speed=1, ...) | ||
indices : (N,1) array_like | ||
Indices which were evaluated, important when taking stepsize unequal to 1 | ||
""" | ||
# X.) Parse inputs | ||
if num_steps is None: | ||
num_steps = len(tg) | ||
|
||
# 1.) Make a copy and adjust attributes | ||
tmp_dict = tg.__dict__.copy() | ||
if tg.batch_size != 1: | ||
tg.batch_size = 1 | ||
tg.random_batches = False | ||
tg.shuffle = False | ||
tg.sample_size = tg.model_timesteps * tg.batch_size | ||
|
||
# 2.) Get output tensors | ||
sess = K.get_session() | ||
(_, test_out) = tg.__getitem__(0) | ||
real_tensor, calc_tensors = K.placeholder(), [] | ||
for output_index in range(0, len(test_out)): | ||
prediction_tensor = model.outputs[output_index] | ||
loss_tensor = model.loss_functions[output_index](real_tensor, prediction_tensor) | ||
calc_tensors.append((prediction_tensor, loss_tensor)) | ||
|
||
# 3.) Predict | ||
losses, predictions, indices = [], [], [] | ||
for i in range(0, num_steps, stepsize): | ||
(in_tg, out_tg) = tg.__getitem__(i) | ||
indices.append(tg.cv_indices[i]) | ||
loss, prediction = [], [] | ||
for o in range(0, len(out_tg)): | ||
evaluated = sess.run(calc_tensors[o], feed_dict={model.input: in_tg, real_tensor: out_tg[o]}) | ||
prediction.append(evaluated[0][0, ...]) | ||
loss.append(evaluated[1][0, ...]) # Get rid of batch dimensions | ||
predictions.append(prediction) | ||
losses.append(loss) | ||
if verbose > 0 and not i % 50: | ||
print('{} / {}'.format(i, num_steps), end='\r') | ||
if verbose > 0: | ||
print('Performed {} gradient steps'.format(num_steps // stepsize)) | ||
losses, predictions, indices = np.array(losses), swap_listaxes(predictions), np.array(indices) | ||
tg.__dict__.update(tmp_dict) | ||
|
||
return losses, predictions, indices | ||
|
||
|
||
def shuffle_wavelets(training_generator, testing_generator, shuffles): | ||
""" | ||
[summary] | ||
Parameters | ||
---------- | ||
training_generator : object | ||
Data generator for training data | ||
testing_generator : object | ||
Data generator for testing data | ||
shuffles : dict | ||
Indicates which axis to shuffle and which index in selected dimension, e.g. {'f' : 5} shuffles frequency axis 5 | ||
Returns | ||
------- | ||
testing_generator : object | ||
Data generator for testing data with shuffled wavelets | ||
""" | ||
rolled_wavelets = training_generator.wavelets.copy() | ||
for key, item in shuffles.items(): | ||
if key == 'f': | ||
np.random.shuffle(rolled_wavelets[:, item, :]) # In place | ||
elif key == 'c': | ||
np.random.shuffle(rolled_wavelets[:, :, item]) # In place | ||
elif key == 't': | ||
np.random.shuffle(rolled_wavelets[item, :, :]) # In place | ||
testing_generator.wavelets = rolled_wavelets | ||
return testing_generator | ||
|
||
|
||
def swap_listaxes(list_in): | ||
list_out = [] | ||
for o in range(0, len(list_in[0])): | ||
list_out.append(np.array([out[o] for out in list_in])) | ||
return list_out |
Oops, something went wrong.