Skip to content

Commit

Permalink
Initial public commit
Browse files Browse the repository at this point in the history
  • Loading branch information
CYHSM committed Dec 11, 2019
0 parents commit acd69cd
Show file tree
Hide file tree
Showing 23 changed files with 2,602 additions and 0 deletions.
28 changes: 28 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
__pycache__/
*.p
*.h5
*.hdf5
logs*
.ipynb_checkpoints
*.mp4
/data
.vscode

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2019 Markus Frey

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
45 changes: 45 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
[![license](https://img.shields.io/github/license/mashape/apistatus.svg)](https://github.com/CYHSM/DeepInsight/blob/master/LICENSE.md)
![py36 status](https://img.shields.io/badge/python3.6-supported-green.svg)

## DeepInsight: A general framework for interpreting wide-band neural activity

DeepInsight is a toolbox for the analysis and interpretation of wide-band neural activity and can be applied on unsorted neural data. This means the traditional step of spike-sorting can be omitted and the raw data can be used directly as input, providing a more objective way of measuring decoding performance.
![Model Architecture](media/model_architecture.png)



## Example Usage
```python
import deepinsight

# Load your electrophysiological or calcium-imaging data
(raw_data, raw_timestamps, output, output_timestamps, info) = deepinsight.util.tetrode.read_tetrode_data(fp_raw_file)

# Transform raw data to frequency domain
deepinsight.preprocess.preprocess_input(fp_deepinsight, raw_data, sampling_rate=info['sampling_rate'], channels=info['channels'])

# Prepare outputs
deepinsight.util.tetrode.preprocess_output(fp_deepinsight, raw_timestamps, output, output_timestamps, sampling_rate=info['sampling_rate'])

# Train the model
deepinsight.train.run_from_path(fp_deepinsight, loss_functions, loss_weights)

# Get loss and shuffled loss for influence plot
losses, output_predictions, indices = deepinsight.analyse.get_model_loss(fp_deepinsight, stepsize=10)
shuffled_losses = deepinsight.analyse.get_shuffled_model_loss(fp_deepinsight, axis=1, stepsize=10)

# Plot influence across behaviours
deepinsight.visualize.plot_residuals(fp_deepinsight, frequency_spacing=2)
```

See also the [jupyter notebook](notebooks/deepinsight_example_usage.ipynb) for a full example for decoding behaviours from tetrode CA1 recordings.

Following Video shows the performance of the model trained on position (left), head direction (top right) and speed (bottom right):
![Model Performance](media/decoding_error.gif)

## Installation
For now install DeepInsight with the following command:
```
pip install -e git+https://github.com/CYHSM/DeepInsight.git
```
A full pip installation and Colab integration will be available soon.
6 changes: 6 additions & 0 deletions deepinsight/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from . import util
from . import preprocess
from . import architecture
from . import train
from . import analyse
from . import visualize
245 changes: 245 additions & 0 deletions deepinsight/analyse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
"""
DeepInsight Toolbox
© Markus Frey
https://github.com/CYHSM/DeepInsight
Licensed under MIT License
"""
import os

import keras.backend as K
import numpy as np
import h5py

from . import util


def get_model_loss(fp_hdf_out, stepsize=1, shuffles=None):
"""
Loops across cross validated models and calculates loss and predictions for full experiment length
Parameters
----------
fp_hdf_out : str
File path to HDF5 file
stepsize : int, optional
Determines how many samples will be evaluated. 1 -> N samples evaluated,
2 -> N/2 samples evaluated, etc..., by default 1
shuffles : dict, optional
If wavelets should be shuffled, important for calculating influence scores, by default None
Returns
-------
losses : (N,1) array_like
Loss between predicted and ground truth observation
predictions : dict
Dictionary with predictions for each behaviour, each item in dict has size (N, Z) with Z the dimensions of the sample (e.g. Z_position=2, Z_speed=1, ...)
indices : (N,1) array_like
Indices which were evaluated, important when taking stepsize unequal to 1
"""
dirname = os.path.dirname(fp_hdf_out)
filename = os.path.basename(fp_hdf_out)[0:-3]
cv_results = []
(_, _, _, opts) = util.hdf5.load_model_with_opts(dirname + '/models/' + filename + '_model_{}.h5'.format(0))
loss_names = opts['loss_names']
time_shift = opts['model_timesteps']
for k in range(0, opts['num_cvs']):
K.clear_session()
# Find folders
model_path = dirname + '/models/' + filename + '_model_{}.h5'.format(k)
# Load model and generators
print('This model {}'.format(model_path))
(model, training_generator, testing_generator, opts) = util.hdf5.load_model_with_opts(model_path)
# -----------------------------------------------------------------------------------------------
print('Getting loss, predictions and saliencies')
if shuffles is not None:
testing_generator = shuffle_wavelets(training_generator, testing_generator, shuffles)
losses, predictions, indices = calculate_losses_from_generator(
testing_generator, model, verbose=1, stepsize=stepsize)
# -----------------------------------------------------------------------------------------------
cv_results.append((losses, predictions, indices))
cv_results = np.array(cv_results)
# Reshape cv_results
losses = np.concatenate(cv_results[:, 0], axis=0)
predictions = {k: [] for k in loss_names}
for out in cv_results[:, 1]:
for p, name in zip(out, loss_names):
predictions[name].append(p)
for key, item in predictions.items():
if stepsize > 1:
tmp_output = np.concatenate(predictions[key], axis=0)[:, -1, :]
else:
tmp_output = np.concatenate(predictions[key], axis=0)[:, -1, :]
tmp_output = np.array([np.pad(l, [time_shift, 0], mode='constant', constant_values=[l[0], 0])
for l in tmp_output.transpose()]).transpose()
predictions[key] = tmp_output
indices = np.concatenate(cv_results[:, 2], axis=0)
# We only take the last timestep for decoding, so decoder does not see any part of the future
indices = indices + time_shift
if stepsize > 1:
losses = losses[:, :, -1]
else:
losses = losses[:, :, -1]
losses = np.array([np.pad(l, [time_shift, 0], mode='constant', constant_values=[l[0], 0])
for l in losses.transpose()]).transpose()
indices = np.arange(0, losses.shape[0])
# Also save to HDF5
hdf5_file = h5py.File(fp_hdf_out, mode='a')
for key, item in predictions.items():
util.hdf5.create_or_update(hdf5_file, dataset_name="analysis/predictions/{}".format(key),
dataset_shape=item.shape, dataset_type=np.float32, dataset_value=item)
util.hdf5.create_or_update(hdf5_file, dataset_name="analysis/losses",
dataset_shape=losses.shape, dataset_type=np.float32, dataset_value=losses)
util.hdf5.create_or_update(hdf5_file, dataset_name="analysis/indices",
dataset_shape=indices.shape, dataset_type=np.int64, dataset_value=indices)
hdf5_file.close()

return losses, predictions, indices


def get_shuffled_model_loss(fp_hdf_out, stepsize=1, axis=0):
"""
Shuffles the wavelets and recalculates error
Parameters
----------
fp_hdf_out : str
File path to HDF5 file
stepsize : int, optional
Determines how many samples will be evaluated. 1 -> N samples evaluated,
2 -> N/2 samples evaluated, etc..., by default 1
axis : int, optional
Which axis to shuffle
Returns
-------
shuffled_losses : (N,1) array_like
Loss between predicted and ground truth observation for shuffled wavelets on specified axis
"""
if axis == 0:
raise ValueError('Shuffling across time dimension (axis=0) not supported yet.')
hdf5_file = h5py.File(fp_hdf_out, mode='r')
tmp_wavelets_shape = hdf5_file['inputs/wavelets'].shape
hdf5_file.close()
shuffled_losses = []
for s in range(0, tmp_wavelets_shape[axis]):
if axis == 1:
losses, _, _ = get_model_loss(fp_hdf_out, stepsize=stepsize, shuffles={'f': s})
elif axis == 2:
losses, _, _ = get_model_loss(fp_hdf_out, stepsize=stepsize, shuffles={'c': s})
shuffled_losses.append(losses)
shuffled_losses = np.array(shuffled_losses)
# Also save to HDF5
hdf5_file = h5py.File(fp_hdf_out, mode='a')
util.hdf5.create_or_update(hdf5_file, dataset_name="analysis/influence/shuffled_losses",
dataset_shape=shuffled_losses.shape, dataset_type=np.float32, dataset_value=shuffled_losses)
hdf5_file.close()

return shuffled_losses


def calculate_losses_from_generator(tg, model, num_steps=None, stepsize=1, verbose=0):
"""
Keras evaluate_generator only returns a scalar loss (mean) while predict_generator only returns the predictions but not the real labels
TODO Make it batch size independent
Parameters
----------
tg : object
Data generator
model : object
Keras model
num_steps : int, optional
How many steps should be evaluated, by default None (runs through full experiment)
stepsize : int, optional
Determines how many samples will be evaluated. 1 -> N samples evaluated,
2 -> N/2 samples evaluated, etc..., by default 1
verbose : int, optional
Verbosity level
Returns
-------
losses : (N,1) array_like
Loss between predicted and ground truth observation
predictions : dict
Dictionary with predictions for each behaviour, each item in dict has size (N, Z) with Z the dimensions of the sample (e.g. Z_position=2, Z_speed=1, ...)
indices : (N,1) array_like
Indices which were evaluated, important when taking stepsize unequal to 1
"""
# X.) Parse inputs
if num_steps is None:
num_steps = len(tg)

# 1.) Make a copy and adjust attributes
tmp_dict = tg.__dict__.copy()
if tg.batch_size != 1:
tg.batch_size = 1
tg.random_batches = False
tg.shuffle = False
tg.sample_size = tg.model_timesteps * tg.batch_size

# 2.) Get output tensors
sess = K.get_session()
(_, test_out) = tg.__getitem__(0)
real_tensor, calc_tensors = K.placeholder(), []
for output_index in range(0, len(test_out)):
prediction_tensor = model.outputs[output_index]
loss_tensor = model.loss_functions[output_index](real_tensor, prediction_tensor)
calc_tensors.append((prediction_tensor, loss_tensor))

# 3.) Predict
losses, predictions, indices = [], [], []
for i in range(0, num_steps, stepsize):
(in_tg, out_tg) = tg.__getitem__(i)
indices.append(tg.cv_indices[i])
loss, prediction = [], []
for o in range(0, len(out_tg)):
evaluated = sess.run(calc_tensors[o], feed_dict={model.input: in_tg, real_tensor: out_tg[o]})
prediction.append(evaluated[0][0, ...])
loss.append(evaluated[1][0, ...]) # Get rid of batch dimensions
predictions.append(prediction)
losses.append(loss)
if verbose > 0 and not i % 50:
print('{} / {}'.format(i, num_steps), end='\r')
if verbose > 0:
print('Performed {} gradient steps'.format(num_steps // stepsize))
losses, predictions, indices = np.array(losses), swap_listaxes(predictions), np.array(indices)
tg.__dict__.update(tmp_dict)

return losses, predictions, indices


def shuffle_wavelets(training_generator, testing_generator, shuffles):
"""
[summary]
Parameters
----------
training_generator : object
Data generator for training data
testing_generator : object
Data generator for testing data
shuffles : dict
Indicates which axis to shuffle and which index in selected dimension, e.g. {'f' : 5} shuffles frequency axis 5
Returns
-------
testing_generator : object
Data generator for testing data with shuffled wavelets
"""
rolled_wavelets = training_generator.wavelets.copy()
for key, item in shuffles.items():
if key == 'f':
np.random.shuffle(rolled_wavelets[:, item, :]) # In place
elif key == 'c':
np.random.shuffle(rolled_wavelets[:, :, item]) # In place
elif key == 't':
np.random.shuffle(rolled_wavelets[item, :, :]) # In place
testing_generator.wavelets = rolled_wavelets
return testing_generator


def swap_listaxes(list_in):
list_out = []
for o in range(0, len(list_in[0])):
list_out.append(np.array([out[o] for out in list_in]))
return list_out
Loading

0 comments on commit acd69cd

Please sign in to comment.