Merge pull request facebookresearch#372 from facebookresearch/repro

adding ablation and reproducibility grid.
Notespier · Aug 30, 2022 · 62b9771 · 62b9771
2 parents 5ee3ca3 + d43615b
commit 62b9771
Show file tree

Hide file tree

Showing 10 changed files with 141 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -40,6 +40,7 @@ width="800px"></p>
 
 See the [release notes](./docs/release.md) for more details.
 
+- 30/08/2022: added reproducibility and ablation grids, along with an updated version of the paper.
 - 17/08/2022: Releasing v3.0.5: Set split segment length to reduce memory. Compatible with pyTorch 1.12. 
 - 24/02/2022: Releasing v3.0.4: split into two stems (i.e. karaoke mode).
     Export as float32 or int24.

diff --git a/conf/config.yaml b/conf/config.yaml
@@ -184,7 +184,7 @@ dora:
 slurm:
   time: 4320
   constraint: volta32gb
-  setup: ['module load cuda/11.0 cudnn/v8.0.3.33-cuda.11.0 NCCL/2.8.3-1-cuda.11.0']
+  setup: ['module load cudnn/v8.4.1.50-cuda.11.6 NCCL/2.11.4-6-cuda.11.6 cuda/11.6']
 
 # Hydra config
 hydra:

diff --git a/demucs/grids/_explorers.py b/demucs/grids/_explorers.py
@@ -8,7 +8,7 @@
 
 
 class MyExplorer(Explorer):
-    test_metrics = ['nsdr']
+    test_metrics = ['nsdr', 'sdr_med']
 
     def get_grid_metrics(self):
         """Return the metrics that should be displayed in the tracking table.

diff --git a/demucs/grids/repro.py b/demucs/grids/repro.py
@@ -0,0 +1,50 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Easier training for reproducibility
+"""
+
+from ._explorers import MyExplorer
+
+
+@MyExplorer
+def explorer(launcher):
+    launcher.slurm_(
+        gpus=8,
+        time=3 * 24 * 60,
+        partition='devlab,learnlab')
+
+    launcher.bind_({'ema.epoch': [0.9, 0.95]})
+    launcher.bind_({'ema.batch': [0.9995, 0.9999]})
+    launcher.bind_({'epochs': 600})
+
+    base = {'model': 'demucs', 'demucs.dconv_mode': 0, 'demucs.gelu': False,
+            'demucs.lstm_layers': 2}
+    newt = {'model': 'demucs', 'demucs.normalize': True}
+    hdem = {'model': 'hdemucs'}
+    svd = {'svd.penalty': 1e-5, 'svd': 'base2'}
+
+    with launcher.job_array():
+        for model in [base, newt, hdem]:
+            sub = launcher.bind(model)
+            if model is base:
+                # Training the v2 Demucs on MusDB HQ
+                sub(epochs=360)
+                continue
+
+            # those two will be used in the repro_mdx_a bag of models.
+            sub(svd)
+            sub(svd, seed=43)
+            if model == newt:
+                # Ablation study
+                sub()
+                abl = sub.bind(svd)
+                abl({'ema.epoch': [], 'ema.batch': []})
+                abl({'demucs.dconv_lstm': 10})
+                abl({'demucs.dconv_attn': 10})
+                abl({'demucs.dconv_attn': 10, 'demucs.dconv_lstm': 10, 'demucs.lstm_layers': 2})
+                abl({'demucs.dconv_mode': 0})
+                abl({'demucs.gelu': False})
diff --git a/demucs/grids/repro_ft.py b/demucs/grids/repro_ft.py
@@ -0,0 +1,46 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+"""
+Fine tuning experiments
+"""
+
+from ._explorers import MyExplorer
+from ..train import main
+
+
+@MyExplorer
+def explorer(launcher):
+    launcher.slurm_(
+        gpus=8,
+        time=300,
+        partition='devlab,learnlab')
+
+    # Mus
+    launcher.slurm_(constraint='volta32gb')
+
+    grid = "repro"
+    folder = main.dora.dir / "grids" / grid
+
+    for sig in folder.iterdir():
+        if not sig.is_symlink():
+            continue
+        xp = main.get_xp_from_sig(sig)
+        xp.link.load()
+        if len(xp.link.history) != xp.cfg.epochs:
+            continue
+        sub = launcher.bind(xp.argv, [f'continue_from="{xp.sig}"'])
+        sub.bind_({'ema.epoch': [0.9, 0.95], 'ema.batch': [0.9995, 0.9999]})
+        sub.bind_({'test.every': 1, 'test.sdr': True, 'epochs': 4})
+        sub.bind_({'dset.segment': 28, 'dset.shift': 2})
+        sub.bind_({'batch_size': 32})
+        auto = {'dset': 'auto_mus'}
+        auto.update({'augment.remix.proba': 0, 'augment.scale.proba': 0,
+                     'augment.shift_same': True})
+        sub.bind_(auto)
+        sub.bind_({'batch_size': 16})
+        sub.bind_({'optim.lr': 1e-4})
+        sub.bind_({'model_segment': 44})
+        sub()
diff --git a/demucs/remote/files.txt b/demucs/remote/files.txt
@@ -14,3 +14,7 @@ b72baf4e-8778635e.th
 464b36d7-e5a9386e.th
 7fd6ef75-a905dd85.th
 83fc094f-4a16d450.th
+1ef250f1-592467ce.th
+902315c2-b39ce9c9.th
+9a6b4851-03af0aa6.th
+fa0cb7f9-100d8bf4.th
diff --git a/demucs/remote/repro_mdx_a.yaml b/demucs/remote/repro_mdx_a.yaml
@@ -0,0 +1,2 @@
+models: ['9a6b4851', '1ef250f1', 'fa0cb7f9', '902315c2']
+segment: 44
diff --git a/demucs/remote/repro_mdx_a_hybrid_only.yaml b/demucs/remote/repro_mdx_a_hybrid_only.yaml
@@ -0,0 +1,2 @@
+models: ['fa0cb7f9', '902315c2', 'fa0cb7f9', '902315c2']
+segment: 44
diff --git a/demucs/remote/repro_mdx_a_time_only.yaml b/demucs/remote/repro_mdx_a_time_only.yaml
@@ -0,0 +1,2 @@
+models: ['9a6b4851', '9a6b4851', '1ef250f1', '1ef250f1']
+segment: 44
diff --git a/docs/training.md b/docs/training.md
@@ -141,21 +141,33 @@ If you want to combine multiple models, potentially with different weights for e
 demucs --repo ./release_models -n my_bag my_track.mp3
 ```
 
-You can also evaluate your bag of model with the following command:
+## Model evaluation
+
+You can evaluate any pre-trained model or bag of models using the following command:
+```bash
+python3 -m tools.test_pretrained -n NAME_OF_MODEL [EXTRA ARGS]
+```
+where `NAME_OF_MODEL` is either the name of the bag (e.g. `mdx`, `repro_mdx_a`),
+or a single Dora signature of one of the model of the bags. You can pass `EXTRA ARGS` to customize
+the test options, like the number of random shifts (e.g. `test.shifts=2`). This will compute the old-style
+SDR and can take quite  bit of time.
+
+For custom models that were trained locally, you will need to indicate that you wish
+to use the local model repositories, with the `--repo ./release_models` flag, e.g.,
 ```bash
 python3 -m tools.test_pretrained --repo ./release_models -n my_bag
 ```
 
 ## Model Zoo
 
 
- **About Wiener filtering**: It came to my attention that in fact none of the model were trained with Wiener filtering. In particular, using Wiener filtering at train time was too slow, while using it only at test time led to worse performance, as this would change the output of the spectrogram prediction without giving a chance to the waveform one to adapt. I will update the paper and code documentation to make that clear.
-
 Here is a short descriptions of the models used for the MDX submission, either Track A (MusDB HQ only)
 or Track B (extra training data allowed). Training happen in two stage, with the second stage
 being the fine tunining on the automix generated dataset.
 All the fine tuned models are available on our AWS repository
-(you can retrieve it with `demucs.pretrained.get_model(SIG)`).
+(you can retrieve it with `demucs.pretrained.get_model(SIG)`). The bag of models are available
+by doing `demucs.pretrained.get_model(NAME)` with `NAME` begin either `mdx` (for Track A) or `mdx_extra`
+(for Track B).
 
 ### Track A
 
@@ -207,5 +219,21 @@ Similarly you can do (those will contain a few extra lines, for training without
 dora grid mdx_extra --dry_run --init
 ```
 
+### Reproducibility and Ablation
+
+I updated the paper to report numbers with a more homogeneous setup than the one used for the competition.
+On MusDB HQ, I still need to use a combination of time only and hybrid models to achieve the best performance.
+The experiments are provided in the grids [repro.py](../demucs/grids/repro.py) and
+[repro_ft._py](../demucs/grids/repro_ft.py) for the fine tuning on the realistic mix datasets.
+
+The new bag of models reaches an SDR of 7.64 (vs. 7.68 for the original track A model). It uses
+2 time only models trained with residual branches, local attention and the SVD penalty,
+along with 2 hybrid models, with the same features, and using CaC representation.
+We average the performance of all the models with the same weight over all sources, unlike
+what was done for the original track A model. We trained for 600 epochs, against 360 before.
+
+The new bag of model is available as part of the pretrained model as `repro_mdx_a`.
+The time only bag is named `repro_mdx_a_time_only`, and the hybrid only `repro_mdx_a_hybrid_only`.
+Checkout the paper for more information on the training.
 
 [dora]: https://github.com/facebookresearch/dora
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		models: ['9a6b4851', '1ef250f1', 'fa0cb7f9', '902315c2']
		segment: 44
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		models: ['fa0cb7f9', '902315c2', 'fa0cb7f9', '902315c2']
		segment: 44
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		models: ['9a6b4851', '9a6b4851', '1ef250f1', '1ef250f1']
		segment: 44