git-chores: Ignore downloaded model files for the demo. (alumae#81)

* prepare-models: Save model files under `models` directory. Keeping all the downloaded files would make it easy to ignore those files in the .gitignore instead of ignoring each type of file, e.g *.txt, *.int, etc. NOTE: The files under the `ivector_extractor` and `conf` directory are not saved under the `models` as it would require to change the path in those config files. These config files are not under source control and are downloaded from an external source. * git-chores: Ignore files download using prepare-models.sh
hcfeng201 · Jul 18, 2019 · a3f488b · a3f488b
1 parent 617e43e
commit a3f488b
Show file tree

Hide file tree

Showing 5 changed files with 24 additions and 15 deletions.
diff --git a/.gitignore b/.gitignore
@@ -6,4 +6,9 @@ src/kaldimarshal.*
 # editor files
 .lvimrc
 tags
-*~
+*~
+
+# Downloaded model files for the demo.
+demo/models/
+conf/
+ivector_extractor/
diff --git a/README.md b/README.md
@@ -142,7 +142,7 @@ The output should list all plugin properties with their default values:
                             Boolean. Default: false
       model               : Filename of the acoustic model
                             flags: readable, writable
-                            String. Default: "final.mdl"
+                            String. Default: "models/final.mdl"
     [...]
       max-nnet-batch-size : Maximum batch size we use in neural-network decodable object, in cases where we are not constrained by currently available frames (this will rarely make a difference)
                             flags: readable, writable

diff --git a/demo/gui-demo.py b/demo/gui-demo.py
@@ -56,13 +56,13 @@ def init_gst(self):
         self.fakesink = Gst.ElementFactory.make("fakesink", "fakesink")
 
         if self.asr:
-          model_file = "final.mdl"
+          model_file = "models/final.mdl"
           if not os.path.isfile(model_file):
               print >> sys.stderr, "Models not downloaded? Run prepare-models.sh first!"
               sys.exit(1)
-          self.asr.set_property("fst", "HCLG.fst")
-          self.asr.set_property("model", "final.mdl")
-          self.asr.set_property("word-syms", "words.txt")
+          self.asr.set_property("fst", "models/HCLG.fst")
+          self.asr.set_property("model", model_file)
+          self.asr.set_property("word-syms", "models/words.txt")
           self.asr.set_property("feature-type", "mfcc")
           self.asr.set_property("mfcc-config", "conf/mfcc.conf")
           self.asr.set_property("ivector-extraction-config", "conf/ivector_extractor.fixed.conf")

diff --git a/demo/prepare-models.sh b/demo/prepare-models.sh
@@ -5,14 +5,18 @@ BASE_URL=http://kaldi-asr.org/downloads/build/2/sandbox/online/egs/fisher_englis
 MODEL=exp/nnet2_online/nnet_a_gpu_online
 GRAPH=exp/tri5a
 
-wget -N $BASE_URL/$MODEL/final.mdl || exit 1
 (mkdir -p ivector_extractor; cd ivector_extractor; wget -N $BASE_URL/$MODEL/ivector_extractor/{final.ie,final.dubm,final.mat,global_cmvn.stats}) || exit 1
 (mkdir -p conf; cd conf; wget -N $BASE_URL/$MODEL/conf/{ivector_extractor.conf,online_nnet2_decoding.conf,mfcc.conf,online_cmvn.conf,splice.conf}) || exit 1
 
+cat conf/ivector_extractor.conf | perl -npe 's/=.*nnet_a_gpu_online\//=/' > conf/ivector_extractor.fixed.conf
+
+mkdir -p models
+cd models
+
+wget -N $BASE_URL/$MODEL/final.mdl || exit 1
 wget -N $BASE_URL/$GRAPH/graph/HCLG.fst || exit 1
 wget -N $BASE_URL/$GRAPH/graph/words.txt || exit 1
 wget -N $BASE_URL/$GRAPH/graph/phones.txt || exit 1
 wget -N $BASE_URL/$GRAPH/graph/phones/word_boundary.int || exit 1
 
-
-cat conf/ivector_extractor.conf | perl -npe 's/=.*nnet_a_gpu_online\//=/' > conf/ivector_extractor.fixed.conf
+cd ..
diff --git a/demo/transcribe-audio.sh b/demo/transcribe-audio.sh
@@ -2,7 +2,7 @@
 
 if [ $# != 1 ]; then
     echo "Usage: transcribe-audio.sh <audio>"
-    echo "e.g.: transcribe-audio.sh dr_strangelove.mp3" 
+    echo "e.g.: transcribe-audio.sh dr_strangelove.mp3"
     exit 1;
 fi
 
@@ -18,11 +18,11 @@ audio=$1
 GST_PLUGIN_PATH=../src gst-launch-1.0 --gst-debug="" -q filesrc location=$audio ! decodebin ! audioconvert ! audioresample ! \
 kaldinnet2onlinedecoder \
   use-threaded-decoder=true \
-  model=final.mdl \
-  fst=HCLG.fst \
-  word-syms=words.txt \
-  phone-syms=phones.txt \
-  word-boundary-file=word_boundary.int \
+  model=models/final.mdl \
+  fst=models/HCLG.fst \
+  word-syms=models/words.txt \
+  phone-syms=models/phones.txt \
+  word-boundary-file=models/word_boundary.int \
   num-nbest=3 \
   num-phone-alignment=3 \
   do-phone-alignment=true \