now extracting the features are set in YAML configuration file

Anwarvic · May 5, 2019 · 6f08f7f · 6f08f7f
1 parent 7d02811
commit 6f08f7f
Showing 1 changed file with 58 additions and 53 deletions.
diff --git a/extract_features.py b/extract_features.py
@@ -1,40 +1,37 @@
 import os
 import sidekit
 import numpy as np
-from multiprocessing import cpu_count
 import logging
 logging.basicConfig(level=logging.INFO)
+from multiprocessing import cpu_count
+from utils import parse_yaml
 
 class FeaturesExtractor():
 
-    def __init__(self):
-        # The parent directory of the project
-        self.BASE_DIR = "/media/anwar/E/Voice_Biometrics/SIDEKIT-1.3/py3env"
+    def __init__(self, conf_path):
+        #parse the YAML configuration file
+        self.conf = parse_yaml(conf_path)
+        self.audio_dir = os.path.join(self.conf['outpath'], "audio") #input dir
+        self.feat_dir = os.path.join(self.conf['outpath'], "feat")
         # Number of parallel threads
         self.NUM_THREADS = cpu_count()
-        # The features need to be extracted
-        #  -> log-energy: energy
-        #  -> "cep" for cepstral coefficients, its size is ceps_number which is CEPS_NUMBER
-        #  -> "fb" for filter-banks, its size is FILTER_BANK_ISE
-        #  -> "energy" for the log-energy, its size is 1
-        #  -> "bnf"
-        self.FEAUTRES = ["vad", "energy", "cep", "fb"]
-
-        #filter bank can either be "log" and "lin" for linear
-        self.FILTER_BANK = "log"
-        self.FILTER_BANK_SIZE = 24
-        self.LOWER_FREQUENCY = 300
-        self.HIGHER_FREQUENCY = 3400
-        #  -> vad: type of voice activity detection algorithm to use.       
-        self.VAD = "snr" #can be either "energy", "snr", "percentil" or "lbl".
-        self.SNR_RATIO = 40 if self.VAD == "snr" else None
+
+        self.FEAUTRES = self.conf['features']
+        self.FILTER_BANK = self.conf['filter_bank']
+        self.FILTER_BANK_SIZE = self.conf['filter_bank_size']
+        self.LOWER_FREQUENCY = self.conf['lower_frequency']
+        self.HIGHER_FREQUENCY = self.conf['higher_frequency']
+        self.VAD = self.conf['vad']
+        self.SNR_RATIO = self.conf['snr_ratio'] if self.VAD=="snr" else None
         # cepstral coefficients
-        self.WINDOW_SIZE = 0.025
-        self.WINDOW_SHIFT = 0.01
-        self.CEPS_NUMBER = 19
+        self.WINDOW_SIZE = self.conf['window_size']
+        self.WINDOW_SHIFT = self.conf['window_shift']
+        self.CEPS_NUMBER = self.conf['cepstral_coefficients']
+        # reset unnecessary ones based on given configuration
+        self.review_member_variables()
 
 
-    def reviewMemberVariables(self):
+    def review_member_variables(self):
         """
         This method is used to modify the values of some of the member
         variables based on the features inserted.
@@ -58,41 +55,47 @@ def reviewMemberVariables(self):
             self.SNR_RATIO = None
 
 
-    def extractFeatures(self, group):
+    def extract_features(self, group):
         """
         This function computes the acoustic parameters of audio files insied 
-        "self.BASE_DIR/group":
-        for a list of audio files and save them to disk in a HDF5 format
+        "self.audio_dir/group" save them to disk in a HDF5 format
+        Args:
+            group (String): the name of the group that we want to extract its
+                featurs. It could be either 'data', 'enroll' or 'test'.
         """
-        in_files = os.listdir(os.path.join(self.BASE_DIR, "audio", group))
-        feat_dir = os.path.join(self.BASE_DIR, "feat", group)
+        assert group in ["enroll", "test"],\
+            "Invalid group name!! Choose either 'enroll', 'test'"
+        in_files = os.listdir(os.path.join(self.audio_dir, group))
+        feat_dir = os.path.join(self.feat_dir, group)
         # Feature extraction
         # lower_frequency: lower frequency (in Herz) of the filter bank
         # higher_frequency: higher frequency of the filter bank
-        # filter_bank: type of fiter scale to use, can be "lin" or "log" (for linear of log-scale)
+        # filter_bank: type of fiter scale to use, can be "lin" or "log"
+        #              (for linear of log-scale)
         # filter_bank_size: number of filters banks
         # window_size: size of the sliding window to process (in seconds)
         # shift: time shift of the sliding window (in seconds)
         # ceps_number: number of cepstral coefficients to extract
         # snr: signal to noise ratio used for "snr" vad algorithm
         # vad: Type of voice activity detection algorithm to use.
         #      It Can be "energy", "snr", "percentil" or "lbl".
-        # save_param: list of strings that indicate which parameters to save. The strings can be:
-        # for bottle-neck features and "vad" for the frame selection labels.
-        # keep_all_features: boolean, if True, all frames are writen; if False, keep only frames according to the vad label
-        extractor = sidekit.FeaturesExtractor(audio_filename_structure=os.path.join(self.BASE_DIR, "audio", group, "{}"),
-                                              feature_filename_structure=os.path.join(feat_dir, "{}.h5"),
-                                              lower_frequency=self.LOWER_FREQUENCY,
-                                              higher_frequency=self.HIGHER_FREQUENCY,
-                                              filter_bank=self.FILTER_BANK,
-                                              filter_bank_size=self.FILTER_BANK_SIZE,
-                                              window_size=self.WINDOW_SIZE,
-                                              shift=self.WINDOW_SHIFT,
-                                              ceps_number=self.CEPS_NUMBER,
-                                              vad=self.VAD,
-                                              snr=self.SNR_RATIO,
-                                              save_param=self.FEAUTRES,
-                                              keep_all_features=True)
+        # save_param: list of strings that indicate which parameters to save. 
+        # keep_all_features: boolean, if True, all frames are writen; if False,
+        #                    keep only frames according to the vad label
+        extractor = sidekit.FeaturesExtractor(
+            audio_filename_structure=os.path.join(self.audio_dir, group, "{}"),
+            feature_filename_structure=os.path.join(feat_dir, "{}.h5"),
+            lower_frequency=self.LOWER_FREQUENCY,
+            higher_frequency=self.HIGHER_FREQUENCY,
+            filter_bank=self.FILTER_BANK,
+            filter_bank_size=self.FILTER_BANK_SIZE,
+            window_size=self.WINDOW_SIZE,
+            shift=self.WINDOW_SHIFT,
+            ceps_number=self.CEPS_NUMBER,
+            vad=self.VAD,
+            snr=self.SNR_RATIO,
+            save_param=self.FEAUTRES,
+            keep_all_features=True)
 
         # Prepare file lists
         # show_list: list of IDs of the show to process
@@ -115,10 +118,11 @@ def extractFeatures(self, group):
         logging.info("Number of skipped files: "+str(len(SKIPPED)))
         for show in SKIPPED:
             logging.debug(show)
-        #BUG: The following lines do the exact same thing
-        # as the few ones above, but with using multi-processing where
-        # num_thread: is the number of parallel process to run
-        # This method freezes after sometime, so you can try it
+        # BUG: The following lines do the exact same thing as the previous
+        # for-loop above, but with using multi-processing where 'num_thread' is
+        # the number of parallel processes to run.
+        # The following method is buggy and freezes after sometime. I don't
+        # recommend using it, but you can give it a try:
         # extractor.save_list(show_list=show_list,
         #                     channel_list=channel_list,
         #                     num_thread=self.NUM_THREADS)
@@ -128,6 +132,7 @@ def extractFeatures(self, group):
 
 
 if __name__ == "__main__":
-    ex = FeaturesExtractor()
-    ex.extractFeatures("enroll")
-    ex.extractFeatures("test")
+    conf_filename = "py3env/conf.yaml"
+    ex = FeaturesExtractor(conf_filename)
+    ex.extract_features("enroll")
+    ex.extract_features("test")