Skip to content

Commit

Permalink
Commit our version of baselines
Browse files Browse the repository at this point in the history
  • Loading branch information
JustinKavalan committed Jun 19, 2020
1 parent 7dc889f commit ad1c35b
Show file tree
Hide file tree
Showing 2 changed files with 129 additions and 16 deletions.
132 changes: 117 additions & 15 deletions baselines.ipynb
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# [FMA: A Dataset For Music Analysis](https://github.com/mdeff/fma)\n",
"# FMA: A Dataset For Music Analysis\n",
"\n",
"Michaël Defferrard, Kirell Benzi, Pierre Vandergheynst, Xavier Bresson, EPFL LTS2.\n",
"\n",
"## Baselines\n",
"\n",
"* This notebook evaluates standard classifiers from scikit-learn on the provided features.\n",
"* This notebook evalutates standard classifiers from scikit-learn on the provided features.\n",
"* Moreover, it evaluates Deep Learning models on both audio and spectrograms."
]
},
Expand All @@ -20,6 +20,9 @@
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"import time\n",
"import os\n",
"\n",
Expand All @@ -28,6 +31,7 @@
"import numpy as np\n",
"import pandas as pd\n",
"import keras\n",
"import dotenv\n",
"from keras.layers import Activation, Dense, Conv1D, Conv2D, MaxPooling1D, Flatten, Reshape\n",
"\n",
"from sklearn.utils import shuffle\n",
Expand All @@ -44,7 +48,10 @@
"from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis\n",
"from sklearn.multiclass import OneVsRestClassifier\n",
"\n",
"import utils"
"import utils\n",
"\n",
"import librosa.display\n",
"import matplotlib.pyplot as plt"
]
},
{
Expand All @@ -53,11 +60,13 @@
"metadata": {},
"outputs": [],
"source": [
"AUDIO_DIR = os.environ.get('AUDIO_DIR')\n",
"dotenv.load_dotenv()\n",
"AUDIO_DIR = os.getenv(\"AUDIO_DIR\")\n",
"print(AUDIO_DIR)\n",
"\n",
"tracks = utils.load('data/fma_metadata/tracks.csv')\n",
"features = utils.load('data/fma_metadata/features.csv')\n",
"echonest = utils.load('data/fma_metadata/echonest.csv')\n",
"tracks = utils.load('tracks.csv')\n",
"features = utils.load('features.csv')\n",
"echonest = utils.load('echonest.csv')\n",
"\n",
"np.testing.assert_array_equal(features.index, tracks.index)\n",
"assert echonest.index.isin(tracks.index).all()\n",
Expand Down Expand Up @@ -187,6 +196,7 @@
" y_train, y_val, y_test, X_train, X_val, X_test = pre_process(tracks, features_all, fset, multi_label)\n",
" scores.loc[fset_name, 'dim'] = X_train.shape[1]\n",
" for clf_name, clf in classifiers.items(): # tqdm_notebook(classifiers.items(), desc='classifiers', leave=False):\n",
" print(clf_name)\n",
" t = time.process_time()\n",
" clf.fit(X_train, y_train)\n",
" score = clf.score(X_test, y_test)\n",
Expand Down Expand Up @@ -219,8 +229,8 @@
" 'DT': DecisionTreeClassifier(max_depth=5),\n",
" 'RF': RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),\n",
" 'AdaBoost': AdaBoostClassifier(n_estimators=10),\n",
" 'MLP1': MLPClassifier(hidden_layer_sizes=(100,), max_iter=2000),\n",
" 'MLP2': MLPClassifier(hidden_layer_sizes=(200, 50), max_iter=2000),\n",
" 'MLP1': MLPClassifier(hidden_layer_sizes=(100,), max_iter=10000),\n",
" 'MLP2': MLPClassifier(hidden_layer_sizes=(200, 50), max_iter=10000),\n",
" 'NB': GaussianNB(),\n",
" 'QDA': QuadraticDiscriminantAnalysis(),\n",
"}\n",
Expand Down Expand Up @@ -475,7 +485,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3.1 ConvNet on MFCC\n",
"# 3.1 ConvNet on MFCC\n",
"\n",
"* Architecture: [Automatic Musical Pattern Feature Extraction Using Convolutional Neural Network](http://www.iaeng.org/publication/IMECS2010/IMECS2010_pp546-550.pdf), Tom LH. Li, Antoni B. Chan and Andy HW. Chun\n",
"* Missing: track segmentation and majority voting.\n",
Expand All @@ -495,15 +505,69 @@
" def load(self, filename):\n",
" import librosa\n",
" x = self.raw_loader.load(filename)\n",
" x = x.astype(float)\n",
" # Each MFCC frame spans 23ms on the audio signal with 50% overlap with the adjacent frames.\n",
" mfcc = librosa.feature.mfcc(x, sr=22050, n_mfcc=13, n_fft=512, hop_length=256)\n",
" return mfcc\n",
"\n",
"loader = MfccLoader()\n",
"SampleLoader = utils.build_sample_loader(AUDIO_DIR, labels_onehot, loader)\n",
"SampleLoader = utils.build_sample_loader(AUDIO_DIR, labels_onehot2, loader)\n",
"loader.load(utils.get_audio_path(AUDIO_DIR, 2))[0].shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def generateData(train, audio_dir, labels, loader, batch_size=4):\n",
" batch_start = 0\n",
" batch_end = batch_size\n",
" data = np.copy(train)\n",
" \n",
" while True:\n",
" X = np.empty((batch_size, *loader.shape))\n",
" Y = np.empty((batch_size, labels.shape[1]), dtype=np.int)\n",
" if batch_start == 0:\n",
" np.random.shuffle(data)\n",
" \n",
" batch_current = batch_start\n",
" batch_cur_size = batch_size\n",
" if(batch_start + batch_size < data.size):\n",
" batch_start += batch_size\n",
" else: \n",
" batch_cur_size = data.size - batch_start\n",
" batch_start = 0\n",
" \n",
" cur_batch = data[batch_current: batch_current + batch_cur_size]\n",
" delIdx = []\n",
" for i, tid in enumerate(cur_batch):\n",
" try:\n",
" X[i] = loader.load(utils.get_audio_path(audio_dir, tid))\n",
" except Exception as e:\n",
" delIdx.append(i)\n",
" print(\"\\nERROR LOADING FILE at index\", i, \":\", e)\n",
" Y[i] = labels.loc[tid]\n",
" \n",
" if len(delIdx) > 0:\n",
" print(\"\\nDeleting the following failed cases:\", delIdx)\n",
" X = np.delete(X, delIdx, axis=0)\n",
" Y = np.delete(Y, delIdx, axis=0)\n",
" \n",
"# print((X, Y))\n",
" yield (X, Y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"val_data=generateData(val, AUDIO_DIR, labels_onehot, loader, batch_size=6400)\n",
"print(val_data)"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -537,8 +601,28 @@
"optimizer = keras.optimizers.SGD(1e-3)#lr=0.01, momentum=0.9, nesterov=True)\n",
"#optimizer = keras.optimizers.Adam()#lr=1e-5)#\n",
"model.compile(optimizer, loss='categorical_crossentropy', metrics=['accuracy'])\n",
"\n",
"model.fit_generator(SampleLoader(train, batch_size=16), train.size, nb_epoch=20, **params)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model.fit_generator(generateData(train, AUDIO_DIR, labels_onehot, loader, batch_size=32), train.size, nb_epoch=20, \n",
" initial_epoch=0, verbose=1, **params,\n",
" validation_data=generateData(val, AUDIO_DIR, labels_onehot, loader, batch_size=800),\n",
" nb_val_samples=800)\n",
"#validation_data=generateData(val, AUDIO_DIR, labels_onehot2, loader, batch_size=16), nb_val_samples=16"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"loss = model.evaluate_generator(SampleLoader(val, batch_size=16), val.size, **params)\n",
"loss = model.evaluate_generator(SampleLoader(test, batch_size=16), test.size, **params)\n",
"#Y = model.predict_generator(loader, test.size, pickle_safe=True, nb_worker=NB_WORKER, max_q_size=5)\n",
Expand All @@ -547,7 +631,25 @@
]
}
],
"metadata": {},
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
13 changes: 12 additions & 1 deletion utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,9 +342,20 @@ def __next__(self):
# print('queue', self.tids[batch_current], batch_size)
tids = np.array(self.tids[batch_current:batch_current+batch_size])

delIdx = []
for i, tid in enumerate(tids):
self.X[i] = self.loader.load(get_audio_path(audio_dir, tid))
try:
self.X[i] = self.loader.load(get_audio_path(audio_dir, tid))
except Exception as e:
delIdx.append(i)
print("\nERROR LOADING FILE at index", i, ":", e)
self.Y[i] = Y.loc[tid]


if len(delIdx) > 0:
print("\nDeleting the following failed cases:", delIdx)
self.X = np.delete(self.X, delIdx, axis=0)
self.Y = np.delete(self.Y, delIdx, axis=0)

with self.lock2:
while (batch_current - self.batch_rearmost.value) % self.tids.size > self.batch_size:
Expand Down

0 comments on commit ad1c35b

Please sign in to comment.