tacotron DDC LJSpeech recipe

kugouming · May 17, 2021 · d1b4699 · d1b4699
1 parent 34a42d3
commit d1b4699
Show file tree

Hide file tree

Showing 5 changed files with 22 additions and 8 deletions.
diff --git a/.gitignore b/.gitignore
@@ -132,4 +132,3 @@ notebooks/data/*
 TTS/tts/layers/glow_tts/monotonic_align/core.c
 .vscode-upload.json
 temp_build/*
-recipes/*
diff --git a/TTS/tts/configs/tacotron_config.py b/TTS/tts/configs/tacotron_config.py
@@ -169,7 +169,8 @@ class TacotronConfig(BaseTTSConfig):
     postnet_ssim_alpha: float = 0.25
     ga_alpha: float = 5.0
 
-
     def check_values(self):
         if self.gradual_training:
-            assert self.gradual_training[0][1] == self.r, f"[!] the first scheduled gradual training `r` must be equal to the model's `r` value. {self.gradual_training[0][1]} vs {self.r}"
+            assert (
+                self.gradual_training[0][1] == self.r
+            ), f"[!] the first scheduled gradual training `r` must be equal to the model's `r` value. {self.gradual_training[0][1]} vs {self.r}"
diff --git a/recipes/README.md b/recipes/README.md
@@ -0,0 +1,13 @@
+# 🐸💬 TTS Training Recipes
+
+TTS recipes intended to host bash scripts running all the necessary steps to train a TTS model with a particular dataset.
+
+Run each script from the root TTS folder as follows
+
+```console
+$ bash ./recipes/<dataset>/<model>/run.sh
+```
+
+All the outputs are held under the recipe directory unless you change the paths in the bash script.
+
+If you train a new model using TTS, feel free to share your training to expand the list of recipes.
diff --git a/recipes/ljspeech/tacotron/run.sh → recipes/ljspeech/tacotron2-DDC/run.sh b/recipes/ljspeech/tacotron/run.sh → recipes/ljspeech/tacotron2-DDC/run.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# take the scripts's parent's directory to prefix all the output paths.
 RUN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
 echo $RUN_DIR
 # download LJSpeech dataset
@@ -12,10 +13,10 @@ tail -n 1100 LJSpeech-1.1/metadata_shuf.csv > LJSpeech-1.1/metadata_val.csv
 mv LJSpeech-1.1 $RUN_DIR/
 rm LJSpeech-1.1.tar.bz2
 # compute dataset mean and variance for normalization
-python TTS/bin/compute_statistics.py $RUN_DIR/tacotron2-DCA.json $RUN_DIR/scale_stats.npy --data_path $RUN_DIR/LJSpeech-1.1/wavs/
+python TTS/bin/compute_statistics.py $RUN_DIR/tacotron2-DDC.json $RUN_DIR/scale_stats.npy --data_path $RUN_DIR/LJSpeech-1.1/wavs/
 # training ....
 # change the GPU id if needed
 CUDA_VISIBLE_DEVICES="0" python TTS/bin/train_tacotron.py --config_path $RUN_DIR/tacotron2-DDC.json \
-                                                          --output_path $RUN_DIR  \
+                                                          --coqpit.output_path $RUN_DIR  \
                                                           --coqpit.datasets.0.path $RUN_DIR/LJSpeech-1.1/    \
                                                           --coqpit.audio.stats_path $RUN_DIR/scale_stats.npy \
diff --git a/recipes/ljspeech/tacotron/tacotron2-DDC.json → ...ljspeech/tacotron2-DDC/tacotron2-DDC.json b/recipes/ljspeech/tacotron/tacotron2-DDC.json → ...ljspeech/tacotron2-DDC/tacotron2-DDC.json
@@ -37,11 +37,10 @@
         "gst_num_style_tokens": 10
     },
     "model": "Tacotron2",
-    "run_name": "ljspeech-dcattn",
-    "run_description": "tacotron2 with dynamic convolution attention.",
+    "run_name": "ljspeech-ddc",
+    "run_description": "tacotron2 with double decoder consistency.",
     "batch_size": 64,
     "eval_batch_size": 16,
-    "r": 2,
     "mixed_precision": true,
     "loss_masking": true,
     "decoder_loss_alpha": 0.25,
@@ -69,6 +68,7 @@
     "double_decoder_consistency": true,
     "ddc_r": 6,
     "attention_norm": "sigmoid",
+    "r": 6,
     "gradual_training": [[0, 6, 64], [10000, 4, 32], [50000, 3, 32], [100000, 2, 32]],
     "stopnet": true,
     "separate_stopnet": true,