Skip to content

Commit

Permalink
edited a typo in i-vector script
Browse files Browse the repository at this point in the history
  • Loading branch information
Anwarvic committed May 12, 2019
1 parent 9df8e88 commit b529043
Showing 1 changed file with 22 additions and 22 deletions.
44 changes: 22 additions & 22 deletions i-vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def __init__(self, conf_path):
super().__init__(conf_path)
# Set parameters of your system
self.conf_path = conf_path
self.NUM_GUASSIANS = self.conf['num_gaussians']
self.NUM_GAUSSIANS = self.conf['num_gaussians']
self.BATCH_SIZE = self.conf['batch_size']
self.TV_RANK = self.conf['tv_rank']
self.TV_ITERATIONS = self.conf['tv_iterations']
Expand All @@ -45,25 +45,25 @@ def __create_stats(self):
raise RuntimeError("Error merging tv_idmap & plda_idmap")

# Check UBM model
model_name = "ubm_{}.h5".format(self.NUM_GUASSIANS)
model_path = os.path.join(self.BASE_DIR, "ubm", model_name)
if not os.path.exists(model_path):
ubm_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS)
ubm_path = os.path.join(self.BASE_DIR, "ubm", ubm_name)
if not os.path.exists(ubm_path):
#if UBM model does not exist, train one
logging.info("Training UBM-{} model".format(self.NUM_GUASSIANS))
logging.info("Training UBM-{} model".format(self.NUM_GAUSSIANS))
ubm = UBM(self.conf_path)
ubm.train()
#load trained UBM model
logging.info("Loading trained UBM-{} model".format(self.NUM_GUASSIANS))
logging.info("Loading trained UBM-{} model".format(self.NUM_GAUSSIANS))
ubm = sidekit.Mixture()
ubm.read(model_path)
ubm.read(ubm_path)
back_stat = sidekit.StatServer( statserver_file_name=back_idmap,
ubm=ubm
)
# Create Feature Server
fs = self.createFeatureServer()

# Jointly compute the sufficient statistics of TV and (if enabled) PLDA data
back_filename = 'back_stat_{}.h5'.format(self.NUM_GUASSIANS)
back_filename = 'back_stat_{}.h5'.format(self.NUM_GAUSSIANS)
if not os.path.isfile(os.path.join(self.BASE_DIR, "stat", back_filename)):
#BUG: don't use self.NUM_THREADS when assgining num_thread
# as it's prune to race-conditioning
Expand All @@ -75,7 +75,7 @@ def __create_stats(self):
back_stat.write(os.path.join(self.BASE_DIR, "stat", back_filename))

# Load the sufficient statistics from TV training data
tv_filename = 'tv_stat_{}.h5'.format(self.NUM_GUASSIANS)
tv_filename = 'tv_stat_{}.h5'.format(self.NUM_GAUSSIANS)
if not os.path.isfile(os.path.join(self.BASE_DIR, "stat", tv_filename)):
tv_stat = sidekit.StatServer.read_subset(
os.path.join(self.BASE_DIR, "stat", back_filename),
Expand All @@ -85,7 +85,7 @@ def __create_stats(self):

# Load sufficient statistics and extract i-vectors from PLDA training data
if self.ENABLE_PLDA:
plda_filename = 'plda_stat_{}.h5'.format(self.NUM_GUASSIANS)
plda_filename = 'plda_stat_{}.h5'.format(self.NUM_GAUSSIANS)
if not os.path.isfile(os.path.join(self.BASE_DIR, "stat", plda_filename)):
plda_stat = sidekit.StatServer.read_subset(
os.path.join(self.BASE_DIR, "stat", back_filename),
Expand All @@ -94,7 +94,7 @@ def __create_stats(self):
plda_stat.write(os.path.join(self.BASE_DIR, "stat", plda_filename))

# Load sufficient statistics from test data
filename = 'test_stat_{}.h5'.format(self.NUM_GUASSIANS)
filename = 'test_stat_{}.h5'.format(self.NUM_GAUSSIANS)
if not os.path.isfile(os.path.join(self.BASE_DIR, "stat", filename)):
test_idmap = sidekit.IdMap.read(os.path.join(self.BASE_DIR, "task", "test_idmap.h5"))
test_stat = sidekit.StatServer( statserver_file_name=test_idmap,
Expand All @@ -121,14 +121,14 @@ def train_tv(self):
self.__create_stats()

# Load UBM model
model_name = "ubm_{}.h5".format(self.NUM_GUASSIANS)
model_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS)
ubm = sidekit.Mixture()
ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name))

# Train TV matrix using FactorAnalyser
filename = "tv_matrix_{}".format(self.NUM_GUASSIANS)
filename = "tv_matrix_{}".format(self.NUM_GAUSSIANS)
outputPath = os.path.join(self.BASE_DIR, "ivector", filename)
tv_filename = 'tv_stat_{}.h5'.format(self.NUM_GUASSIANS)
tv_filename = 'tv_stat_{}.h5'.format(self.NUM_GAUSSIANS)
fa = sidekit.FactorAnalyser()
fa.total_variability_single(os.path.join(self.BASE_DIR, "stat", tv_filename),
ubm,
Expand All @@ -145,7 +145,7 @@ def train_tv(self):
# tv_sigma = fa.Sigma # Residual covariance matrix

# Clear files produced at each iteration
filename_regex = "tv_matrix_{}_it-*.h5".format(self.NUM_GUASSIANS)
filename_regex = "tv_matrix_{}_it-*.h5".format(self.NUM_GAUSSIANS)
lst = glob(os.path.join(self.BASE_DIR, "ivector", filename_regex))
for f in lst:
os.remove(f)
Expand All @@ -156,18 +156,18 @@ def evaluate(self, explain=True):
This method is used to score our trained model.
"""
# Load UBM model
model_name = "ubm_{}.h5".format(self.NUM_GUASSIANS)
model_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS)
ubm = sidekit.Mixture()
ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name))

# Load TV matrix
filename = "tv_matrix_{}".format(self.NUM_GUASSIANS)
filename = "tv_matrix_{}".format(self.NUM_GAUSSIANS)
outputPath = os.path.join(self.BASE_DIR, "ivector", filename)
fa = sidekit.FactorAnalyser(outputPath+".h5")

# Extract i-vectors from enrollment data
logging.info("Extracting i-vectors from enrollment data")
filename = 'enroll_stat_{}.h5'.format(self.NUM_GUASSIANS)
filename = 'enroll_stat_{}.h5'.format(self.NUM_GAUSSIANS)
enroll_stat = sidekit.StatServer.read(os.path.join(self.BASE_DIR, 'stat', filename))
enroll_iv = fa.extract_ivectors_single( ubm=ubm,
stat_server=enroll_stat,
Expand All @@ -176,7 +176,7 @@ def evaluate(self, explain=True):

# Extract i-vectors from test data
logging.info("Extracting i-vectors from test data")
filename = 'test_stat_{}.h5'.format(self.NUM_GUASSIANS)
filename = 'test_stat_{}.h5'.format(self.NUM_GAUSSIANS)
test_stat = sidekit.StatServer.read(os.path.join(self.BASE_DIR, 'stat', filename))
test_iv = fa.extract_ivectors_single(ubm=ubm,
stat_server=test_stat,
Expand All @@ -192,15 +192,15 @@ def evaluate(self, explain=True):
wccn=None
)
# Write scores
filename = "ivector_scores_cos_{}.h5".format(self.NUM_GUASSIANS)
filename = "ivector_scores_cos_{}.h5".format(self.NUM_GAUSSIANS)
scores_cos.write(os.path.join(self.BASE_DIR, "result", filename))

# Explain the Analysis by writing more readible text file
if explain:
modelset = list(scores_cos.modelset)
segset = list(scores_cos.segset)
scores = np.array(scores_cos.scoremat)
filename = "ivector_scores_explained_{}.txt".format(iv.NUM_GUASSIANS)
filename = "ivector_scores_explained_{}.txt".format(iv.NUM_GAUSSIANS)
fout = open(os.path.join(iv.BASE_DIR, "result", filename), "a")
fout.truncate(0) #clear content
for seg_idx, seg in enumerate(segset):
Expand All @@ -222,7 +222,7 @@ def evaluate(self, explain=True):
def getAccuracy(self):
import h5py
# Load scores file
filename = "ivector_scores_cos_{}.h5".format(self.NUM_GUASSIANS)
filename = "ivector_scores_cos_{}.h5".format(self.NUM_GAUSSIANS)
filepath = os.path.join(self.BASE_DIR, "result", filename)
h5 = h5py.File(filepath, mode="r")
modelset = list(h5["modelset"])
Expand Down

3 comments on commit b529043

@IsmaelAh
Copy link

@IsmaelAh IsmaelAh commented on b529043 May 24, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hello,i saw all your commits, so i will expalin problem exactly,i dont use your sidekit because give me an error in the begining ,this is a part from code to determine the problem:

import sidekit
import os
import numpy as np

# Setting parameters
nbThread = 4 # change to desired number of threads
nbDistrib = 16 # change to desired final number of Gaussian distributions
base_dir = "C:/Users/new--laptop/Documents/Speaker-Recognition-master"
wav_dir = os.path.join(base_dir, "audio")#this folder contain speech files
feature_dir = os.path.join(base_dir, "features")#in the  begining this folder is empty  

# Prepare file lists 
all_files = os.listdir(wav_dir)
show_list = np.unique(np.hstack([all_files]))
channel_list = np.zeros_like(show_list, dtype = int)

# # 1: Feature extraction
extractor = sidekit.FeaturesExtractor(audio_filename_structure=os.path.join(wav_dir, "{}"),
                                       feature_filename_structure=os.path.join(feature_dir, "{}.h5"),
                                       sampling_frequency=44100,
                                       lower_frequency=200,
                                       higher_frequency=3800,
                                       filter_bank="log",
                                       filter_bank_size=24,
                                       window_size=0.04,
                                       shift=0.01,
                                       ceps_number=20,
                                       vad="snr",
                                       snr=40,
                                       pre_emphasis=0.97,
                                       save_param=["vad", "energy", "cep", "fb"],
                                       keep_all_features=True)


extractor.save_list(show_list=show_list,
                     channel_list=channel_list,
                     num_thread=1)

@Anwarvic
when i run it ,give me this error

Traceback (most recent call last)
<ipython-input-4-a573ca944544> in <module>
     35 extractor.save_list(show_list=show_list,
     36                      channel_list=channel_list,
---> 37                      num_thread=1)

~\Machine learning\Speaker-Recognition-b5290431169b8c00dafda9db895c0d188d98401e\Speaker-Recognition-b5290431169b8c00dafda9db895c0d188d98401e\sidekit\sidekit_wrappers.py in wrapper(*args, **kwargs)
    227         else:
    228             logging.debug("No Parallel processing with this module")
--> 229             func(*args, **kwargs)
    230 
    231     return wrapper

~\Machine learning\Speaker-Recognition-b5290431169b8c00dafda9db895c0d188d98401e\Speaker-Recognition-b5290431169b8c00dafda9db895c0d188d98401e\sidekit\features_extractor.py in save_list(self, show_list, channel_list, audio_file_list, feature_file_list, noise_file_list, snr_list, reverb_file_list, reverb_levels, num_thread)
    764                                                                                                        reverb_levels):
    765 
--> 766             self.save(show, channel, audio_file, feature_file, noise_file, snr, reverb_file, reverb_level)

~\Machine learning\Speaker-Recognition-b5290431169b8c00dafda9db895c0d188d98401e\Speaker-Recognition-b5290431169b8c00dafda9db895c0d188d98401e\sidekit\features_extractor.py in save(self, show, channel, input_audio_filename, output_feature_filename, noise_file_name, snr, reverb_file_name, reverb_level)
    506                            snr=snr,
    507                            reverb_file_name=reverb_file_name,
--> 508                            reverb_level=reverb_level)
    509         logging.info(h5f.filename)
    510 

~\Machine learning\Speaker-Recognition-b5290431169b8c00dafda9db895c0d188d98401e\Speaker-Recognition-b5290431169b8c00dafda9db895c0d188d98401e\sidekit\features_extractor.py in extract(self, show, channel, input_audio_filename, output_feature_filename, backing_store, noise_file_name, snr, reverb_file_name, reverb_level)
    351 
    352         # Open audio file, get the signal and possibly the sampling frequency
--> 353         signal, sample_rate = read_audio(audio_filename, self.sampling_frequency)
    354         if signal.ndim == 1:
    355             signal = signal[:, numpy.newaxis]

~\Machine learning\Speaker-Recognition-b5290431169b8c00dafda9db895c0d188d98401e\Speaker-Recognition-b5290431169b8c00dafda9db895c0d188d98401e\sidekit\frontend\io.py in read_audio(input_file_name, framerate)
    419         sig, read_framerate, sampwidth = read_sph(input_file_name, 'p')
    420     elif ext.lower() == '.wav' or ext.lower() == '.wave':
--> 421         sig, read_framerate, sampwidth = read_wav(input_file_name)
    422     elif ext.lower() == '.pcm' or ext.lower() == '.raw':
    423         sig, read_framerate, sampwidth = read_pcm(input_file_name)

~\Machine learning\Speaker-Recognition-b5290431169b8c00dafda9db895c0d188d98401e\Speaker-Recognition-b5290431169b8c00dafda9db895c0d188d98401e\sidekit\frontend\io.py in read_wav(input_file_name)
    116     :return:
    117     """
--> 118     with wave.open(input_file_name, "r") as wfh:
    119         (nchannels, sampwidth, framerate, nframes, comptype, compname) = wfh.getparams()
    120         raw = wfh.readframes(nframes * nchannels)

~\Anaconda3\lib\wave.py in open(f, mode)
    497             mode = 'rb'
    498     if mode in ('r', 'rb'):
--> 499         return Wave_read(f)
    500     elif mode in ('w', 'wb'):
    501         return Wave_write(f)

~\Anaconda3\lib\wave.py in __init__(self, f)
    161         # else, assume it is an open file object already
    162         try:
--> 163             self.initfp(f)
    164         except:
    165             if self._i_opened_the_file:

~\Anaconda3\lib\wave.py in initfp(self, file)
    141             chunkname = chunk.getname()
    142             if chunkname == b'fmt ':
--> 143                 self._read_fmt_chunk(chunk)
    144                 self._fmt_chunk_read = 1
    145             elif chunkname == b'data':

~\Anaconda3\lib\wave.py in _read_fmt_chunk(self, chunk)
    258             self._sampwidth = (sampwidth + 7) // 8
    259         else:
--> 260             raise Error('unknown format: %r' % (wFormatTag,))
    261         self._framesize = self._nchannels * self._sampwidth
    262         self._comptype = 'NONE'

Error: unknown format: 65534

@IsmaelAh
Copy link

@IsmaelAh IsmaelAh commented on b529043 May 25, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when i run code using sidekit from pip in Jupyter notbook

and this is the code :

cell num=1

import sidekit
import os
import numpy as np

# Setting parameters
nbThread = 4 # change to desired number of threads
nbDistrib = 16 # change to desired final number of Gaussian distributions
base_dir = "C:/Users/new--laptop/Documents/Speaker-Recognition-master"
wav_dir = os.path.join(base_dir, "audio")#this folder contain speech files
feature_dir = os.path.join(base_dir, "features")#in the  begining this folder is empty  

# Prepare file lists 
all_files = os.listdir(wav_dir)
show_list = np.unique(np.hstack([all_files]))
channel_list = np.zeros_like(show_list, dtype = int)

# # 1: Feature extraction
extractor = sidekit.FeaturesExtractor(audio_filename_structure=os.path.join(wav_dir, "{}"),
                                       feature_filename_structure=os.path.join(feature_dir, "{}.h5"),
                                       sampling_frequency=44100,
                                       lower_frequency=200,
                                       higher_frequency=3800,
                                       filter_bank="log",
                                       filter_bank_size=24,
                                       window_size=0.04,
                                       shift=0.01,
                                       ceps_number=20,
                                       vad="snr",
                                       snr=40,
                                       pre_emphasis=0.97,
                                       save_param=["vad", "energy", "cep", "fb"],
                                       keep_all_features=True)


extractor.save_list(show_list=show_list,
                     channel_list=channel_list,
                     num_thread=1)

cell num=2

#UBM Training
ubm_list = os.listdir(os.path.join(base_dir, "features")) # make sure this directory only contains the feature files extracted above
for i in range(len(ubm_list)):
    ubm_list[i] = ubm_list[i].split(".h5")[0]

server = sidekit.FeaturesServer(features_extractor=None,
                                feature_filename_structure=os.path.join(feature_dir, "{}.h5"),
                                sources=None,
                                dataset_list=["vad", "energy", "cep", "fb"],
                                feat_norm="cmvn",
                                global_cmvn=None,
                                dct_pca=False,
                                dct_pca_config=None,
                                sdc=False,
                                sdc_config=None,
                                delta=True,
                                double_delta=True,
                                delta_filter=None,
                                context=None,
                                traps_dct_nb=None,
                                rasta=False,
                                keep_all_features=False)


ubm = sidekit.Mixture()


ubm.EM_split(features_server=server,
             feature_list=ubm_list,
             distrib_nb=16,
             iterations=(1, 2),
             num_thread=1,
             save_partial=True,
#              ceil_cov=10,
#              floor_cov=1e-2
             )

cell num=3

task_dir = os.path.join(base_dir, "task")
def create_idMap( ):
    
        # Make enrollment (IdMap) file list
        group_dir = os.path.join(base_dir,"audio")
        group_files = sorted(os.listdir(group_dir))
        # list of model IDs
        group_models = [files.split('.')[0] for files in group_files]
        print(group_models)
        # list of audio segments IDs
        group_segments = [files for files in group_files]
        print(group_segments)
        # Generate IdMap
        
        group_idmap = sidekit.IdMap()
        group_idmap.leftids = np.asarray(group_models)
        group_idmap.rightids = np.asarray(group_segments)
        group_idmap.start = np.empty(group_idmap.rightids.shape, '|O')
        group_idmap.stop = np.empty(group_idmap.rightids.shape, '|O')
        if group_idmap.validate():
            group_idmap.write(os.path.join(task_dir, "hh"+'_idmap.h5'))
            group_idmap.write(os.path.join(task_dir, 'tv_idmap.h5'))
            group_idmap.write(os.path.join(task_dir, 'plda_idmap.h5'))
        else:
            raise RuntimeError('Problems with creating idMap file')

   
create_idMap( )

cell num=4

enroll_idmap = sidekit.IdMap.read(os.path.join(base_dir, "task", "hh_idmap.h5"))
enroll_stat = sidekit.StatServer(statserver_file_name=enroll_idmap,distrib_nb=16,feature_size=60)
# Compute the sufficient statistics

enroll_stat.accumulate_stat(ubm=ubm,
feature_server=server, 
 seg_indices=range(enroll_stat.segset.shape[0]),num_thread=1)

the code is run until cell num=4,it give error which is:

C:\Users\new--laptop\Anaconda3\lib\site-packages\h5py\_hl\dataset.py:313: H5pyDeprecationWarning: dataset.value has been deprecated. Use dataset[()] instead.
  "Use dataset[()] instead.", H5pyDeprecationWarning)
---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)
<ipython-input-8-9cf671a03982> in <module>
      5 enroll_stat.accumulate_stat(ubm=ubm,
      6 feature_server=server,
----> 7  seg_indices=range(enroll_stat.segset.shape[0]),num_thread=1)

~\Anaconda3\lib\site-packages\sidekit\sidekit_wrappers.py in wrapper(*args, **kwargs)
    227         else:
    228             logging.debug("No Parallel processing with this module")
--> 229             func(*args, **kwargs)
    230 
    231     return wrapper

~\Anaconda3\lib\site-packages\sidekit\statserver.py in accumulate_stat(self, ubm, feature_server, seg_indices, channel_extension, num_thread)
    721                 pp, foo = sum_log_probabilities(lp)
    722                 # Compute 0th-order statistics
--> 723                 self.stat0[idx, :] = pp.sum(0)
    724                 # Compute 1st-order statistics
    725                 self.stat1[idx, :] = numpy.reshape(numpy.transpose(numpy.dot(data.transpose(), pp)), ubm.sv_size()).astype(STAT_TYPE)

ValueError: could not broadcast input array from shape (4) into shape (16)

@Anwarvic
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

First Issue

First, let's start with the first issue... May I ask you for a favor?? Go to the repo of this project on Github and open the file extract_features.py. You will see that I specifically pointed out to use extractor.save instead of extractor.save_list as the latter is prone to error as "it is buggy and freezes after some time. I don't recommend using it". So, I suggest using save() instead of save_list() which can be done like so:

for show, channel in zip(show_list, channel_list):
                extractor.save(show, channel)

Instead of:

extractor.save_list(show_list=show_list,
                     channel_list=channel_list,
                     num_thread=1)

Why is that the case?
I'm not sure, it might be the order of the files

Second Issue

According to the second issue, I can see that you have used:

enroll_stat = sidekit.StatServer(statserver_file_name=enroll_idmap,
                                                  distrib_nb=16,
                                                  feature_size=60)

Which I don't recommend at all, I think the feature_size and distrib_nb should be taken from the UBM model. So, I suggest using:

enroll_stat = sidekit.StatServer(statserver_file_name=enroll_idmap,
                                                 ubm=ubm)

Little Note

Commenting on commits isn't the best way to solve code problems for multiple reasons:

  • To get to these comments, I had to click on the notification icon. Then, click on Read to get here. If there are no new comments, I won't be able to access them back unless I remember which commit did you comment on.
  • These comments will be available to only the people who access this commit. Which isn't pretty helpful as this project is about 120 commits till now. And the project still needs work.

So, I suggest writing all the problems that you have in the issues panel. And check this link for reference.

I know the code is messy and I have made a lot of changes in a small period of time. So, thanks for sticking with me during this period :)

Please sign in to comment.