- WSL LTS 20.04
- kaldi
- python 3
- Aishell(170h, 400 speakers)
- Configure path of kaldi in
path.sh
:
export KALDI_ROOT=xxx/kaldi # kaldi root
export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe_v2.5:$PWD:$PATH
[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
. $KALDI_ROOT/tools/config/common_path.sh
export LC_ALL=C
- Configure train cmd in
cmd.sh
:
export train_cmd="run.pl -mem 4G"
- Configure softlinks:
ln -snf $KALDI_ROOT/egs/sre08/v1/sid sid
ln -snf $KALDI_ROOT/egs/sre08/v1/steps steps
ln -snf $KALDI_ROOT/egs/sre08/v1/utils utils
. ./stage0_prepare_train_data.sh
. ./stage1_make_mfcc_vad.sh
. ./stage2_filter_feature.sh
# note: need to init x-vector model before training
. ./path.sh
nnet3-init ./exp/xvector_nnet_la/nnet.config ./exp/xvector_nnet_la/0.raw
. ./stage3_train_x_vector.sh
. ./stage4_train_plda.sh
. ./stage5_split_enroll_eval.sh
. ./stage6_calc_eer_result.sh
The following datasets can be used in data augmentation: