Skip to content

Commit

Permalink
now extracting the features are set in YAML configuration file
Browse files Browse the repository at this point in the history
  • Loading branch information
Anwarvic committed May 5, 2019
1 parent 7d02811 commit 6f08f7f
Showing 1 changed file with 58 additions and 53 deletions.
111 changes: 58 additions & 53 deletions extract_features.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,37 @@
import os
import sidekit
import numpy as np
from multiprocessing import cpu_count
import logging
logging.basicConfig(level=logging.INFO)
from multiprocessing import cpu_count
from utils import parse_yaml

class FeaturesExtractor():

def __init__(self):
# The parent directory of the project
self.BASE_DIR = "/media/anwar/E/Voice_Biometrics/SIDEKIT-1.3/py3env"
def __init__(self, conf_path):
#parse the YAML configuration file
self.conf = parse_yaml(conf_path)
self.audio_dir = os.path.join(self.conf['outpath'], "audio") #input dir
self.feat_dir = os.path.join(self.conf['outpath'], "feat")
# Number of parallel threads
self.NUM_THREADS = cpu_count()
# The features need to be extracted
# -> log-energy: energy
# -> "cep" for cepstral coefficients, its size is ceps_number which is CEPS_NUMBER
# -> "fb" for filter-banks, its size is FILTER_BANK_ISE
# -> "energy" for the log-energy, its size is 1
# -> "bnf"
self.FEAUTRES = ["vad", "energy", "cep", "fb"]

#filter bank can either be "log" and "lin" for linear
self.FILTER_BANK = "log"
self.FILTER_BANK_SIZE = 24
self.LOWER_FREQUENCY = 300
self.HIGHER_FREQUENCY = 3400
# -> vad: type of voice activity detection algorithm to use.
self.VAD = "snr" #can be either "energy", "snr", "percentil" or "lbl".
self.SNR_RATIO = 40 if self.VAD == "snr" else None

self.FEAUTRES = self.conf['features']
self.FILTER_BANK = self.conf['filter_bank']
self.FILTER_BANK_SIZE = self.conf['filter_bank_size']
self.LOWER_FREQUENCY = self.conf['lower_frequency']
self.HIGHER_FREQUENCY = self.conf['higher_frequency']
self.VAD = self.conf['vad']
self.SNR_RATIO = self.conf['snr_ratio'] if self.VAD=="snr" else None
# cepstral coefficients
self.WINDOW_SIZE = 0.025
self.WINDOW_SHIFT = 0.01
self.CEPS_NUMBER = 19
self.WINDOW_SIZE = self.conf['window_size']
self.WINDOW_SHIFT = self.conf['window_shift']
self.CEPS_NUMBER = self.conf['cepstral_coefficients']
# reset unnecessary ones based on given configuration
self.review_member_variables()


def reviewMemberVariables(self):
def review_member_variables(self):
"""
This method is used to modify the values of some of the member
variables based on the features inserted.
Expand All @@ -58,41 +55,47 @@ def reviewMemberVariables(self):
self.SNR_RATIO = None


def extractFeatures(self, group):
def extract_features(self, group):
"""
This function computes the acoustic parameters of audio files insied
"self.BASE_DIR/group":
for a list of audio files and save them to disk in a HDF5 format
"self.audio_dir/group" save them to disk in a HDF5 format
Args:
group (String): the name of the group that we want to extract its
featurs. It could be either 'data', 'enroll' or 'test'.
"""
in_files = os.listdir(os.path.join(self.BASE_DIR, "audio", group))
feat_dir = os.path.join(self.BASE_DIR, "feat", group)
assert group in ["enroll", "test"],\
"Invalid group name!! Choose either 'enroll', 'test'"
in_files = os.listdir(os.path.join(self.audio_dir, group))
feat_dir = os.path.join(self.feat_dir, group)
# Feature extraction
# lower_frequency: lower frequency (in Herz) of the filter bank
# higher_frequency: higher frequency of the filter bank
# filter_bank: type of fiter scale to use, can be "lin" or "log" (for linear of log-scale)
# filter_bank: type of fiter scale to use, can be "lin" or "log"
# (for linear of log-scale)
# filter_bank_size: number of filters banks
# window_size: size of the sliding window to process (in seconds)
# shift: time shift of the sliding window (in seconds)
# ceps_number: number of cepstral coefficients to extract
# snr: signal to noise ratio used for "snr" vad algorithm
# vad: Type of voice activity detection algorithm to use.
# It Can be "energy", "snr", "percentil" or "lbl".
# save_param: list of strings that indicate which parameters to save. The strings can be:
# for bottle-neck features and "vad" for the frame selection labels.
# keep_all_features: boolean, if True, all frames are writen; if False, keep only frames according to the vad label
extractor = sidekit.FeaturesExtractor(audio_filename_structure=os.path.join(self.BASE_DIR, "audio", group, "{}"),
feature_filename_structure=os.path.join(feat_dir, "{}.h5"),
lower_frequency=self.LOWER_FREQUENCY,
higher_frequency=self.HIGHER_FREQUENCY,
filter_bank=self.FILTER_BANK,
filter_bank_size=self.FILTER_BANK_SIZE,
window_size=self.WINDOW_SIZE,
shift=self.WINDOW_SHIFT,
ceps_number=self.CEPS_NUMBER,
vad=self.VAD,
snr=self.SNR_RATIO,
save_param=self.FEAUTRES,
keep_all_features=True)
# save_param: list of strings that indicate which parameters to save.
# keep_all_features: boolean, if True, all frames are writen; if False,
# keep only frames according to the vad label
extractor = sidekit.FeaturesExtractor(
audio_filename_structure=os.path.join(self.audio_dir, group, "{}"),
feature_filename_structure=os.path.join(feat_dir, "{}.h5"),
lower_frequency=self.LOWER_FREQUENCY,
higher_frequency=self.HIGHER_FREQUENCY,
filter_bank=self.FILTER_BANK,
filter_bank_size=self.FILTER_BANK_SIZE,
window_size=self.WINDOW_SIZE,
shift=self.WINDOW_SHIFT,
ceps_number=self.CEPS_NUMBER,
vad=self.VAD,
snr=self.SNR_RATIO,
save_param=self.FEAUTRES,
keep_all_features=True)

# Prepare file lists
# show_list: list of IDs of the show to process
Expand All @@ -115,10 +118,11 @@ def extractFeatures(self, group):
logging.info("Number of skipped files: "+str(len(SKIPPED)))
for show in SKIPPED:
logging.debug(show)
#BUG: The following lines do the exact same thing
# as the few ones above, but with using multi-processing where
# num_thread: is the number of parallel process to run
# This method freezes after sometime, so you can try it
# BUG: The following lines do the exact same thing as the previous
# for-loop above, but with using multi-processing where 'num_thread' is
# the number of parallel processes to run.
# The following method is buggy and freezes after sometime. I don't
# recommend using it, but you can give it a try:
# extractor.save_list(show_list=show_list,
# channel_list=channel_list,
# num_thread=self.NUM_THREADS)
Expand All @@ -128,6 +132,7 @@ def extractFeatures(self, group):


if __name__ == "__main__":
ex = FeaturesExtractor()
ex.extractFeatures("enroll")
ex.extractFeatures("test")
conf_filename = "py3env/conf.yaml"
ex = FeaturesExtractor(conf_filename)
ex.extract_features("enroll")
ex.extract_features("test")

0 comments on commit 6f08f7f

Please sign in to comment.