Merge pull request facebookresearch#420 from facebookresearch/release_v4

preparing official v4 release
jjreisfl · Dec 7, 2022 · 4847eb4 · 4847eb4
2 parents 20d63ef + 2496b8f
commit 4847eb4
Show file tree

Hide file tree

Showing 7 changed files with 26 additions and 18 deletions.
diff --git a/README.md b/README.md
@@ -32,6 +32,8 @@ commands described hereafter with `-n htdemucs_ft`.
 The single, non fine-tuned model is provided as `-n htdemucs`, and the retrained baseline
 as `-n hdemucs_mmi`. The Sparse Hybrid Transformer model decribed in our paper is not provided as its
 requires custom CUDA code that is not ready for release yet.
+We are also releasing an experimental 6 sources model, that adds a `guitar` and `piano` source.
+Quick testing seems to show okay quality for `guitar`, but a lot of bleeding and artifacts for the `piano` source.
 
 
 <p align="center">
@@ -46,6 +48,8 @@ width="800px"></p>
 
 See the [release notes](./docs/release.md) for more details.
 
+- 07/12/2022: Demucs v4 now on PyPI. **htdemucs** model now used by default. Also releasing
+    a 6 sources models (adding `guitar` and `piano`, although the latter doesn't work so well at the moment).
 - 16/11/2022: Added the new **Hybrid Transformer Demucs v4** models.
 	Adding support for the [torchaudio implementation of HDemucs](https://pytorch.org/audio/stable/tutorials/hybrid_demucs_tutorial.html).
 - 30/08/2022: added reproducibility and ablation grids, along with an updated version of the paper.
@@ -58,13 +62,6 @@ See the [release notes](./docs/release.md) for more details.
 	on all sources. This is the model that won Sony MDX challenge.
 - 11/05/2021: Adding support for MusDB-HQ and arbitrary wav set, for the MDX challenge. For more information
 on joining the challenge with Demucs see [the Demucs MDX instructions](docs/mdx.md)
-- 28/04/2021: **Demucs v2**, with extra augmentation and DiffQ based quantization.
-  **EVERYTHING WILL BREAK**, please restart from scratch following the instructions hereafter.
-  This version also adds overlap between prediction frames, with linear transition from one to the next,
-  which should prevent sudden changes at frame boundaries. Also, Demucs is now on PyPI, so for separation
-  only, installation is as easy as `pip install demucs` :)
-- 13/04/2020: **Demucs released under MIT**: We are happy to release Demucs under the MIT licence.
-    We hope that this will broaden the impact of this research to new applications.
 
 
 ## Comparison with other models
@@ -122,9 +119,6 @@ For bleeding edge versions, you can install directly from this repo using
 python3 -m pip install -U git+https://github.com/facebookresearch/demucs#egg=demucs
 ```
 
-**For Hybrid Transformer Demucs,** you must install the bleeding edge version and use either
-`-n htdemucs` or `-n htdemucs_ft`.
-
 Advanced OS support are provided on the following page, **you must read the page for your OS before posting an issues**:
 - **If you are using Windows:** [Windows support](docs/windows.md).
 - **If you are using MAC OS X:** [Mac OS X support](docs/mac.md).
@@ -215,15 +209,17 @@ You can also try to reduce the volume of the input mixture before feeding it to
 
 Other pre-trained models can be selected with the `-n` flag.
 The list of pre-trained models is:
-- `htdemucs`: first version of Hybrid Transformer Demucs. Trained on MusDB + 800 songs.
+- `htdemucs`: first version of Hybrid Transformer Demucs. Trained on MusDB + 800 songs. Default model.
 - `htdemucs_ft`: fine-tuned version of `htdemucs`, separation will take 4 times more time
     but might be a bit better. Same training set as `htdemucs`.
+- `htdemucs_6s`: 6 sources version of `htdemucs`, with `piano` and `guitar` being added as sources.
+    Note that the `piano` source is not working great at the moment.
 - `hdemucs_mmi`: Hybrid Demucs v3, retrained on MusDB + 800 songs.
 - `mdx`: trained only on MusDB HQ, winning model on track A at the [MDX][mdx] challenge.
 - `mdx_extra`: trained with extra training data (including MusDB test set), ranked 2nd on the track B
     of the [MDX][mdx] challenge.
 - `mdx_q`, `mdx_extra_q`: quantized version of the previous models. Smaller download and storage
-    but quality can be slightly worse. `mdx_extra_q` is the default model used.
+    but quality can be slightly worse.
 - `SIG`: where `SIG` is a single model from the [model zoo](docs/training.md#model-zoo).
 
 The `--two-stems=vocals` option allows to separate vocals from the rest (e.g. karaoke mode).

diff --git a/demucs/__init__.py b/demucs/__init__.py
@@ -4,4 +4,4 @@
 # This source code is licensed under the license found in the
 # LICENSE file in the root directory of this source tree.
 
-__version__ = "4.0.0a1"
+__version__ = "4.0.0"
diff --git a/demucs/audio.py b/demucs/audio.py
@@ -244,7 +244,7 @@ def save_audio(wav, path, samplerate, bitrate=320, clip='rescale',
     path = Path(path)
     suffix = path.suffix.lower()
     if suffix == ".mp3":
-        encode_mp3(wav, path, samplerate, bitrate)
+        encode_mp3(wav, path, samplerate, bitrate, verbose=True)
     elif suffix == ".wav":
         if as_float:
             bits_per_sample = 32

diff --git a/demucs/pretrained.py b/demucs/pretrained.py
@@ -10,7 +10,7 @@
 from pathlib import Path
 import typing as tp
 
-from dora.log import fatal
+from dora.log import fatal, bold
 
 from .hdemucs import HDemucs
 from .repo import RemoteRepo, LocalRepo, ModelOnlyRepo, BagOnlyRepo, AnyModelRepo, ModelLoadingError  # noqa
@@ -20,6 +20,7 @@
 REMOTE_ROOT = Path(__file__).parent / 'remote'
 
 SOURCES = ["drums", "bass", "other", "vocals"]
+DEFAULT_MODEL = 'htdemucs'
 
 
 def demucs_unittest():
@@ -30,7 +31,7 @@ def demucs_unittest():
 def add_model_flags(parser):
     group = parser.add_mutually_exclusive_group(required=False)
     group.add_argument("-s", "--sig", help="Locally trained XP signature.")
-    group.add_argument("-n", "--name", default="mdx_extra_q",
+    group.add_argument("-n", "--name", default=None,
                        help="Pretrained model name or signature. Default is mdx_extra_q.")
     parser.add_argument("--repo", type=Path,
                         help="Folder containing all pre-trained models for use with -n.")
@@ -79,4 +80,10 @@ def get_model_from_args(args):
     """
     Load local model package or pre-trained model.
     """
+    if args.name is None:
+        args.name = DEFAULT_MODEL
+        print(bold("Important: the default model was recently changed to `htdemucs`"),
+              "the latest Hybrid Transformer Demucs model. In some cases, this model can "
+              "actually perform worse than previous models. To get back the old default model "
+              "use `-n mdx_extra_q`.")
     return get_model(name=args.name, repo=args.repo)
diff --git a/demucs/remote/files.txt b/demucs/remote/files.txt
@@ -27,4 +27,6 @@ f7e0c4bc-ba3fe64a.th
 d12395a8-e57c48e6.th
 92cfc3b6-ef3bcb9c.th
 04573f0d-f3cf25b2.th
-75fc33f5-1941ce65.th
+75fc33f5-1941ce65.th
+# Experimental 6 sources model
+5c90dfd2-34c22ccb.th
diff --git a/demucs/remote/htdemucs_6s.yaml b/demucs/remote/htdemucs_6s.yaml
@@ -0,0 +1 @@
+models: ['5c90dfd2']
diff --git a/docs/release.md b/docs/release.md
@@ -1,12 +1,14 @@
 # Release notes for Demucs
 
 
-## V4.0.0a, TBC
+## V4.0.0, 7th of December 2022
 
 Adding hybrid transformer Demucs model.
 
 Added support for [Torchaudio implementation of HDemucs](https://pytorch.org/audio/main/tutorials/hybrid_demucs_tutorial.html), thanks @skim0514.
 
+Added experimental 6 sources model `htdemucs_6s` (`drums`, `bass`, `other`, `vocals`, `piano`, `guitar`).
+
 ## V3.0.6, 16th of November 2022
 
 Option to customize output path of stems (@CarlGao4)