Merge branch 'master' of github.com:espnet/espnet into feature/espnet…

…2_fastspeech2
spxia · Jul 26, 2020 · c16d935 · c16d935
2 parents 7425014 + cf12d0e
commit c16d935
Show file tree

Hide file tree

Showing 190 changed files with 8,147 additions and 987 deletions.
diff --git a/ci/install.sh b/ci/install.sh
@@ -6,7 +6,7 @@ set -euo pipefail
 
 $CXX -v
 
-( 
+(
     set -euo pipefail
     cd tools
     # To suppress the installation for Kaldi
@@ -16,6 +16,7 @@ $CXX -v
     else
         make PYTHON="$(which python)" TH_VERSION="${TH_VERSION}"
     fi
+    make moses.done
     rm kaldi.done
 )
 if [ -z "${PS1:-}" ]; then

diff --git a/ci/test_integration.sh b/ci/test_integration.sh
@@ -77,6 +77,42 @@ echo "==== ASR Mix (backend=pytorch, model=transformer) ==="
 rm -rf exp tensorboard dump data
 cd "${cwd}" || exit 1
 
+# test st recipe
+cd ./egs/mini_an4/st1 || exit 1
+echo "==== ST (backend=pytorch) ==="
+./run.sh
+echo "==== ST (backend=pytorch asr0.3) ==="
+./run.sh --stage 4 --train_config conf/train_asr0.3.yaml
+echo "==== ST (backend=pytorch ctc asr0.3) ==="
+./run.sh --stage 4 --train_config conf/train_ctc_asr0.3.yaml
+echo "==== ST (backend=pytorch mt0.3) ==="
+./run.sh --stage 4 --train_config conf/train_mt0.3.yaml
+echo "==== ST (backend=pytorch asr0.2 mt0.2) ==="
+./run.sh --stage 4 --train_config conf/train_asr0.2_mt0.2.yaml
+echo "==== ST (backend=pytorch, model=transformer) ==="
+./run.sh --stage 4 --train_config conf/train_transformer.yaml
+echo "==== ST (backend=pytorch asr0.3, model=transformer) ==="
+./run.sh --stage 4 --train_config conf/train_transformer_asr0.3.yaml
+echo "==== ST (backend=pytorch ctc asr0.3, model=transformer) ==="
+./run.sh --stage 4 --train_config conf/train_transformer_ctc_asr0.3.yaml
+echo "==== ST (backend=pytorch mt0.3, model=transformer) ==="
+./run.sh --stage 4 --train_config conf/train_transformer_mt0.3.yaml
+echo "==== ST (backend=pytorch asr0.2 mt0.2, model=transformer) ==="
+./run.sh --stage 4 --train_config conf/train_transformer_asr0.2_mt0.2.yaml
+# Remove generated files in order to reduce the disk usage
+rm -rf exp tensorboard dump data
+cd "${cwd}" || exit 1
+
+# test mt recipe
+cd ./egs/mini_an4/mt1 || exit 1
+echo "==== MT (backend=pytorch) ==="
+./run.sh
+echo "==== MT (backend=pytorch, model=transformer) ==="
+./run.sh --stage 4 --train_config conf/train_transformer.yaml
+# Remove generated files in order to reduce the disk usage
+rm -rf exp tensorboard dump data
+cd "${cwd}" || exit 1
+
 # test tts recipe
 cd ./egs/mini_an4/tts1 || exit 1
 echo "==== TTS (backend=pytorch) ==="
@@ -100,7 +136,7 @@ done
 for t in ${feats_types}; do
     for t2 in ${token_types}; do
         echo "==== feats_type=${t}, token_types=${t2} ==="
-        ./run.sh --ngpu 0 --stage 6 --stop-stage 13 --feats-type "${t}" --token-type "${t2}" \
+        ./run.sh --ngpu 0 --stage 6 --stop-stage 13 --skip-upload false --feats-type "${t}" --token-type "${t2}" \
             --asr-args "--max_epoch=1" --lm-args "--max_epoch=1"
     done
 done
@@ -115,7 +151,7 @@ echo "==== [ESPnet2] TTS ==="
 feats_types="raw fbank stft"
 for t in ${feats_types}; do
     echo "==== feats_type=${t} ==="
-    ./run.sh --ngpu 0 --stage 2 --stop-stage 8 --feats-type "${t}" --train-args "--max_epoch 1"
+    ./run.sh --ngpu 0 --stage 2 --stop-stage 8 --skip-upload false --feats-type "${t}" --train-args "--max_epoch 1"
 done
 # Remove generated files in order to reduce the disk usage
 rm -rf exp dump data

diff --git a/ci/test_shell.sh b/ci/test_shell.sh
@@ -21,7 +21,7 @@ set -euo pipefail
 echo "=== run shellcheck ==="
 find ci utils doc egs2/TEMPLATE/*/scripts egs2/TEMPLATE/*/setup.sh tools/*.sh -name "*.sh" -printf "=> %p\n" -execdir shellcheck -Calways -x -e SC2001 -e SC1091 -e SC2086 {} \; | tee check_shellcheck
 find egs2/*/*/local/data.sh -printf "=> %p\n" -execdir sh -c 'cd .. ; shellcheck -Calways -x -e SC2001 -e SC1091 -e SC2086 local/$1 ; ' -- {} \; | tee check_shellcheck
-find egs egs2 \( -name "run.sh" -o -name asr.sh -o -name tts.sh \) -printf "=> %p\n" -execdir shellcheck -Calways -x -e SC2001 -e SC1091 -e SC2086 {} \; | tee -a check_shellcheck
+find egs egs2 \( -name "run.sh" -o -name asr.sh -o -name tts.sh -o -name enh.sh \) -printf "=> %p\n" -execdir shellcheck -Calways -x -e SC2001 -e SC1091 -e SC2086 {} \; | tee -a check_shellcheck
 
 if grep -q "SC[0-9]\{4\}" check_shellcheck; then
     echo "[ERROR] shellcheck failed"

diff --git a/egs/an4/tts1/run.sh b/egs/an4/tts1/run.sh
@@ -11,7 +11,7 @@ backend=pytorch
 stage=-1
 stop_stage=100
 ngpu=1       # number of gpus ("0" uses cpu, otherwise use gpu)
-nj=32        # numebr of parallel jobs
+nj=32        # number of parallel jobs
 dumpdir=dump # directory to dump full features
 verbose=1    # verbose option (if set > 0, get more log)
 N=0          # number of minibatches to be used (mainly for debugging). "0" uses all minibatches.

diff --git a/egs/arctic/tts1/run.sh b/egs/arctic/tts1/run.sh
@@ -11,7 +11,7 @@ backend=pytorch
 stage=-1
 stop_stage=100
 ngpu=1       # number of gpus ("0" uses cpu, otherwise use gpu)
-nj=16        # numebr of parallel jobs
+nj=16        # number of parallel jobs
 dumpdir=dump # directory to dump full features
 verbose=1    # verbose option (if set > 0, get more log)
 N=0          # number of minibatches to be used (mainly for debugging). "0" uses all minibatches.

diff --git a/egs/blizzard17/tts1/run.sh b/egs/blizzard17/tts1/run.sh
@@ -11,7 +11,7 @@ backend=pytorch
 stage=-1
 stop_stage=100
 ngpu=1       # number of gpus ("0" uses cpu, otherwise use gpu)
-nj=32        # numebr of parallel jobs
+nj=32        # number of parallel jobs
 dumpdir=dump # directory to dump full features
 verbose=1    # verbose option (if set > 0, get more log)
 N=0          # number of minibatches to be used (mainly for debugging). "0" uses all minibatches.

diff --git a/egs/csmsc/tts1/run.sh b/egs/csmsc/tts1/run.sh
@@ -11,7 +11,7 @@ backend=pytorch
 stage=-1
 stop_stage=100
 ngpu=1       # number of gpus ("0" uses cpu, otherwise use gpu)
-nj=32        # numebr of parallel jobs
+nj=32        # number of parallel jobs
 dumpdir=dump # directory to dump full features
 verbose=1    # verbose option (if set > 0, get more log)
 N=0          # number of minibatches to be used (mainly for debugging). "0" uses all minibatches.

diff --git a/egs/jnas/tts1/run.sh b/egs/jnas/tts1/run.sh
@@ -11,7 +11,7 @@ backend=pytorch
 stage=-1
 stop_stage=100
 ngpu=1       # number of gpu in training
-nj=32        # numebr of parallel jobs
+nj=32        # number of parallel jobs
 dumpdir=dump # directory to dump full features
 verbose=1    # verbose option (if set > 1, get more log)
 seed=1       # random seed number

diff --git a/egs/jsut/tts1/run.sh b/egs/jsut/tts1/run.sh
@@ -11,7 +11,7 @@ backend=pytorch
 stage=-1
 stop_stage=100
 ngpu=1       # number of gpus ("0" uses cpu, otherwise use gpu)
-nj=32        # numebr of parallel jobs
+nj=32        # number of parallel jobs
 dumpdir=dump # directory to dump full features
 verbose=1    # verbose option (if set > 0, get more log)
 N=0          # number of minibatches to be used (mainly for debugging). "0" uses all minibatches.

diff --git a/egs/jvs/tts1/run.sh b/egs/jvs/tts1/run.sh
@@ -11,7 +11,7 @@ backend=pytorch
 stage=-1
 stop_stage=100
 ngpu=1       # number of gpus ("0" uses cpu, otherwise use gpu)
-nj=8         # numebr of parallel jobs
+nj=8         # number of parallel jobs
 dumpdir=dump # directory to dump full features
 verbose=1    # verbose option (if set > 0, get more log)
 N=0          # number of minibatches to be used (mainly for debugging). "0" uses all minibatches.

diff --git a/egs/libritts/tts1/run.sh b/egs/libritts/tts1/run.sh
@@ -11,7 +11,7 @@ backend=pytorch
 stage=-1
 stop_stage=100
 ngpu=1       # number of gpu in training
-nj=64        # numebr of parallel jobs
+nj=64        # number of parallel jobs
 dumpdir=dump # directory to dump full features
 verbose=1    # verbose option (if set > 1, get more log)
 seed=1       # random seed number

diff --git a/egs/ljspeech/asr1/run.sh b/egs/ljspeech/asr1/run.sh
@@ -12,7 +12,7 @@ backend=pytorch
 stage=-1
 stop_stage=100
 ngpu=1       # number of gpus ("0" uses cpu, otherwise use gpu)
-nj=32        # numebr of parallel jobs
+nj=32        # number of parallel jobs
 dumpdir=dump # directory to dump full features
 verbose=1    # verbose option (if set > 0, get more log)
 N=0          # number of minibatches to be used (mainly for debugging). "0" uses all minibatches.

diff --git a/egs/ljspeech/tts1/run.sh b/egs/ljspeech/tts1/run.sh
@@ -12,7 +12,7 @@ backend=pytorch
 stage=-1
 stop_stage=100
 ngpu=1       # number of gpus ("0" uses cpu, otherwise use gpu)
-nj=32        # numebr of parallel jobs
+nj=32        # number of parallel jobs
 dumpdir=dump # directory to dump full features
 verbose=1    # verbose option (if set > 0, get more log)
 N=0          # number of minibatches to be used (mainly for debugging). "0" uses all minibatches.

diff --git a/egs/ljspeech/tts2/run.sh b/egs/ljspeech/tts2/run.sh
@@ -11,7 +11,7 @@ backend=pytorch
 stage=-1
 stop_stage=100
 ngpu=1       # number of gpus ("0" uses cpu, otherwise use gpu)
-nj=32        # numebr of parallel jobs
+nj=32        # number of parallel jobs
 dumpdir=dump # directory to dump full features
 verbose=1    # verbose option (if set > 0, get more log)
 N=0          # number of minibatches to be used (mainly for debugging). "0" uses all minibatches.

diff --git a/egs/m_ailabs/tts1/run.sh b/egs/m_ailabs/tts1/run.sh
@@ -11,7 +11,7 @@ backend=pytorch
 stage=-1
 stop_stage=100
 ngpu=1       # number of gpus ("0" uses cpu, otherwise use gpu)
-nj=32        # numebr of parallel jobs
+nj=32        # number of parallel jobs
 dumpdir=dump # directory to dump full features
 verbose=1    # verbose option (if set > 0, get more log)
 N=0          # number of minibatches to be used (mainly for debugging). "0" uses all minibatches.

diff --git a/egs/mini_an4/asr1/conf/lm.yaml b/egs/mini_an4/asr1/conf/lm.yaml
@@ -7,6 +7,6 @@ schedulers: lr=noam             # optimizer  lr scheduler
 lr-noam-warmup: 2
 sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs
 batchsize: 2    # 1024 for character LMs
-epoch: 3        # number of epochs
-patience: 2
+epoch: 2        # number of epochs
+patience: 1
 maxlen: 40        # 150 for character LMs
diff --git a/egs/mini_an4/asr1/conf/lm_transformer.yaml b/egs/mini_an4/asr1/conf/lm_transformer.yaml
@@ -1,9 +1,9 @@
 # network architecture
 model-module: transformer
-att-unit: 16
+att-unit: 8
 embed-unit: 2
-head: 8
-layer: 2
+head: 2
+layer: 1
 pos-enc: none
 unit: 10
 
@@ -15,7 +15,7 @@ maxlen: 40
 opt: adam
 schedulers: lr=cosine
 dropout-rate: 0.0
-epoch: 3
+epoch: 2
 gradclip: 1.0
 lr: 1e-4
 lr-cosine-total: 3

diff --git a/egs/mini_an4/asr1/conf/train.yaml b/egs/mini_an4/asr1/conf/train.yaml
@@ -14,15 +14,15 @@ sampling-probability: 0.0
 # encoder related
 etype: blstmp     # encoder architecture type
 elayers: 2
-eunits: 32
-eprojs: 32
+eunits: 16
+eprojs: 16
 subsample: "1_2_2_1_1" # skip every n frame from input to nth layers
 # decoder related
 dlayers: 1
-dunits: 30
+dunits: 16
 # attention related
 atype: location
-adim: 32
+adim: 16
 aconv-chans: 4
 aconv-filts: 3
 

diff --git a/egs/mini_an4/asr1/conf/train_conformer.yaml b/egs/mini_an4/asr1/conf/train_conformer.yaml
@@ -1,13 +1,13 @@
 # network architecture
 # encoder related
-elayers: 2
-eunits: 32
+elayers: 1
+eunits: 4
 # decoder related
-dlayers: 2
-dunits: 32
+dlayers: 1
+dunits: 4
 # attention related
-adim: 16
-aheads: 4
+adim: 4
+aheads: 2
 
 # hybrid CTC/attention
 mtlalpha: 0.3
@@ -26,7 +26,7 @@ opt: noam
 accum-grad: 2
 grad-clip: 5
 patience: 0
-epochs: 3
+epochs: 1
 dropout-rate: 0.1
 
 # transformer specific setting

diff --git a/egs/mini_an4/asr1/conf/train_conformer_pure_ctc.yaml b/egs/mini_an4/asr1/conf/train_conformer_pure_ctc.yaml
@@ -1,13 +1,13 @@
 # network architecture
 # encoder related
-elayers: 2
-eunits: 32
+elayers: 1
+eunits: 4
 # decoder related
-dlayers: 2
-dunits: 32
+dlayers: 1
+dunits: 4
 # attention related
-adim: 16
-aheads: 4
+adim: 4
+aheads: 2
 
 # hybrid CTC/attention
 mtlalpha: 1.0
@@ -26,7 +26,7 @@ opt: noam
 accum-grad: 2
 grad-clip: 5
 patience: 0
-epochs: 3
+epochs: 1
 dropout-rate: 0.1
 
 # transformer specific setting

diff --git a/egs/mini_an4/asr1/conf/train_mulenc2.yaml b/egs/mini_an4/asr1/conf/train_mulenc2.yaml
@@ -5,8 +5,8 @@ maxlen-out: 150 # if output length > maxlen_out, batchsize is automatically redu
 # optimization related
 sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs
 opt: adadelta
-epochs: 3
-patience: 2
+epochs: 1
+patience: 0
 
 # scheduled sampling option
 sampling-probability: 0.0
@@ -17,23 +17,23 @@ model-module: "espnet.nets.pytorch_backend.e2e_asr_mulenc:E2E"
 # encoder related
 etype: [ blstmp, blstmp ]     # encoder architecture type
 elayers: [ 2, 2 ]
-eunits: [ 32, 32 ]
-eprojs: 32
+eunits: [ 4, 4 ]
+eprojs: 4
 subsample: [ "1_2_2_1_1", "1_2_2_1_1" ] # skip every n frame from input to nth layers
 # decoder related
 dlayers: 1
-dunits: 30
+dunits: 4
 # attention related
 atype: [ location, location ]
-adim: [ 32, 32 ]
+adim: [ 4, 4 ]
 awin: [ 5, 5 ]
 aheads: [ 4, 4 ]
 aconv-chans: [ 4, 4 ]
 aconv-filts: [ 3, 3 ]
 
 # hierarchical attention network (han)
 han-type: add
-han-dim: 32
+han-dim: 4
 han-win: 5
 han-heads: 4
 han-conv-chans: 4

diff --git a/egs/mini_an4/asr1/conf/train_multispkr.yaml b/egs/mini_an4/asr1/conf/train_multispkr.yaml
@@ -6,25 +6,25 @@ maxlen-out: 150 # if output length > maxlen_out, batchsize is automatically redu
 # optimization related
 sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs
 opt: adadelta
-epochs: 3
-patience: 2
+epochs: 1
+patience: 0
 
 # scheduled sampling option
 sampling-probability: 0.0
 
 # encoder related
 etype: vggblstmp     # encoder architecture type
 elayers-sd: 1
-elayers: 2
-eunits: 32
-eprojs: 32
+elayers: 1
+eunits: 4
+eprojs: 4
 subsample: "1_2_2_1_1" # skip every n frame from input to nth layers
 # decoder related
 dlayers: 1
-dunits: 30
+dunits: 4
 # attention related
 atype: location
-adim: 32
+adim: 4
 aconv-chans: 4
 aconv-filts: 3