Skip to content

Commit

Permalink
[egs] Fixes to the aishell (Mandarin) recipe (kaldi-asr#1770)
Browse files Browse the repository at this point in the history
  • Loading branch information
naxingyu authored and danpovey committed Jul 19, 2017
1 parent aedc2fe commit c794d55
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 9 deletions.
8 changes: 3 additions & 5 deletions egs/aishell/s5/local/aishell_data_prep.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ fi

# find wav audio file for train, dev and test resp.
find $aishell_audio_dir -iname "*.wav" > $tmp_dir/wav.flist
n=`wc -l $tmp_dir/wav.flist`
n=`cat $tmp_dir/wav.flist | wc -l`
[ $n -ne 141925 ] && \
echo Warning: expected 141925 data data files, found $n

Expand All @@ -45,10 +45,8 @@ rm -r $tmp_dir
# Transcriptions preparation
for dir in $train_dir $dev_dir $test_dir; do
echo Preparing $dir transcriptions
sed -e 's/\.wav//' $dir/wav.flist | awk -F '/' '{print $NF}' |\
sort > $dir/utt.list
sed -e 's/\.wav//' $dir/wav.flist | awk -F '/' '{i=NF-1;printf("%s %s\n",$NF,$i)}' |\
sort > $dir/utt2spk_all
sed -e 's/\.wav//' $dir/wav.flist | awk -F '/' '{print $NF}' > $dir/utt.list
sed -e 's/\.wav//' $dir/wav.flist | awk -F '/' '{i=NF-1;printf("%s %s\n",$NF,$i)}' > $dir/utt2spk_all
paste -d' ' $dir/utt.list $dir/wav.flist > $dir/wav.scp_all
utils/filter_scp.pl -f 1 $dir/utt.list $aishell_text > $dir/transcripts.txt
awk '{print $1}' $dir/transcripts.txt > $dir/utt.list
Expand Down
4 changes: 2 additions & 2 deletions egs/aishell/s5/local/download_and_untar.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ if [ $# -ne 3 ]; then
echo "Usage: $0 [--remove-archive] <data-base> <url-base> <corpus-part>"
echo "e.g.: $0 /export/a05/xna/data www.openslr.org/resources/33 data_aishell"
echo "With --remove-archive it will remove the archive after successfully un-tarring it."
echo "<corpus-part> can be one of: data_aishell, resource."
echo "<corpus-part> can be one of: data_aishell, resource_aishell."
fi

data=$1
Expand All @@ -28,7 +28,7 @@ if [ ! -d "$data" ]; then
fi

part_ok=false
list="data_aishell resource"
list="data_aishell resource_aishell"
for x in $list; do
if [ "$part" == $x ]; then part_ok=true; fi
done
Expand Down
4 changes: 2 additions & 2 deletions egs/aishell/s5/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
data=/export/a05/xna/data
data_url=www.openslr.org/resources/33

. cmd.sh
. ./cmd.sh

local/download_and_untar.sh $data $data_url data_aishell || exit 1;
local/download_and_untar.sh $data $data_url resource || exit 1;
local/download_and_untar.sh $data $data_url resource_aishell || exit 1;

# Lexicon Preparation,
local/aishell_prepare_dict.sh $data/resource_aishell || exit 1;
Expand Down

0 comments on commit c794d55

Please sign in to comment.