Skip to content

Commit b112d99

Browse files
author
daniil.kulko
committedAug 30, 2021
PEP fixes
1 parent 8f725b2 commit b112d99

8 files changed

+304
-335
lines changed
 

‎VBx/VB_diarization.py

+148-196
Large diffs are not rendered by default.

‎VBx/diarization_lib.py

+61-61
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,14 @@ def twoGMMcalib_lin(s, niters=20):
2020
var = np.var(s)
2121
threshold = np.inf
2222
for _ in range(niters):
23-
lls = np.log(weights)-0.5*np.log(var) - 0.5*(s[:,np.newaxis]-means)**2/var
23+
lls = np.log(weights) - 0.5 * np.log(var) - 0.5 * (s[:, np.newaxis] - means)**2 / var
2424
gammas = softmax(lls, axis=1)
2525
cnts = np.sum(gammas, axis=0)
2626
weights = cnts / cnts.sum()
2727
means = s.dot(gammas) / cnts
2828
var = ((s**2).dot(gammas) / cnts - means**2).dot(weights)
29-
threshold = -0.5*(np.log(weights**2/var)-means**2/var).dot([1,-1])/(means/var).dot([1,-1])
30-
return threshold, lls[:,means.argmax()]-lls[:,means.argmin()]
29+
threshold = -0.5 * (np.log(weights**2 / var) - means**2 / var).dot([1, -1]) / (means/var).dot([1, -1])
30+
return threshold, lls[:, means.argmax()] - lls[:, means.argmin()]
3131

3232

3333
def AHC(sim_mx, threshold=0):
@@ -41,18 +41,19 @@ def AHC(sim_mx, threshold=0):
4141
cluster labels stored in an array of length N containing (integers in
4242
the range from 0 to C-1, where C is the number of dicovered clusters)
4343
"""
44-
dist = -sim_mx;
44+
dist = -sim_mx
4545
dist[np.diag_indices_from(dist)] = np.inf
4646
clsts = [[i] for i in range(len(dist))]
4747
while True:
4848
mi, mj = np.sort(np.unravel_index(dist.argmin(), dist.shape))
4949
if dist[mi, mj] > -threshold:
5050
break
51-
dist[:, mi] = dist[mi,:] = (dist[mi,:]*len(clsts[mi])+dist[mj,:]*len(clsts[mj]))/(len(clsts[mi])+len(clsts[mj]))
52-
dist[:, mj] = dist[mj,:] = np.inf
51+
dist[:, mi] = dist[mi, :] = (dist[mi, :]*len(clsts[mi]) + dist[mj, :]*len(clsts[mj])) / \
52+
(len(clsts[mi]) + len(clsts[mj]))
53+
dist[:, mj] = dist[mj, :] = np.inf
5354
clsts[mi].extend(clsts[mj])
5455
clsts[mj] = None
55-
labs= np.empty(len(dist), dtype=int)
56+
labs = np.empty(len(dist), dtype=int)
5657
for i, c in enumerate([e for e in clsts if e]):
5758
labs[c] = i
5859
return labs
@@ -73,14 +74,14 @@ def PLDA_scoring_in_LDA_space(Fe, Ft, diagAC):
7374
"""
7475
# See (7-8) in L. Burget et al.: "Discriminatively trained probabilistic
7576
# linear discriminant analysis for speaker verification", in ICASSP 2011.
76-
iTC = 1.0 / (1 + diagAC)
77-
iWC2AC = 1.0 / (1 + 2*diagAC)
78-
ldTC = np.sum(np.log(1 + diagAC))
77+
iTC = 1.0 / (1 + diagAC)
78+
iWC2AC = 1.0 / (1 + 2*diagAC)
79+
ldTC = np.sum(np.log(1 + diagAC))
7980
ldWC2AC = np.sum(np.log(1 + 2*diagAC))
80-
Gamma = -0.25*(iWC2AC + 1 - 2*iTC)
81-
Lambda= -0.5 *(iWC2AC - 1)
82-
k = - 0.5*(ldWC2AC - 2*ldTC)
83-
return np.dot(Fe * Lambda, Ft.T) + (Fe**2).dot(Gamma)[:,np.newaxis] + (Ft**2).dot(Gamma) + k
81+
Gamma = -0.25 * (iWC2AC + 1 - 2*iTC)
82+
Lambda = -0.5 * (iWC2AC - 1)
83+
k = -0.5 * (ldWC2AC - 2*ldTC)
84+
return np.dot(Fe * Lambda, Ft.T) + (Fe**2).dot(Gamma)[:, np.newaxis] + (Ft**2).dot(Gamma) + k
8485

8586

8687
def kaldi_ivector_plda_scoring_dense(kaldi_plda, x, target_energy=0.1, pca_dim=None):
@@ -102,23 +103,21 @@ def kaldi_ivector_plda_scoring_dense(kaldi_plda, x, target_energy=0.1, pca_dim=N
102103
matrix of pairwise similarities between the input x-vectors
103104
"""
104105
plda_mu, plda_tr, plda_psi = kaldi_plda
105-
[energy,PCA]=spl.eigh(np.cov(x.T, bias=True))
106+
energy, PCA = spl.eigh(np.cov(x.T, bias=True))
106107
if pca_dim is None:
107-
energy=np.cumsum(energy[::-1])
108-
pca_dim=np.sum(energy/energy[-1]<=target_energy) + 2
109-
# we need at least 2 dimensions, so 2 more dimensions are always added
108+
energy = np.cumsum(energy[::-1])
109+
pca_dim = np.sum(energy/energy[-1] <= target_energy) + 2
110+
# we need at least 2 dimensions, so 2 more dimensions are always added
110111

111-
PCA=PCA[:,:-pca_dim-1:-1]
112+
PCA = PCA[:, :-pca_dim-1:-1]
112113
print("pca_dim:", pca_dim)
113114

114-
plda_tr_inv_pca=PCA.T.dot(np.linalg.inv(plda_tr))
115+
plda_tr_inv_pca = PCA.T.dot(np.linalg.inv(plda_tr))
115116
W = plda_tr_inv_pca.dot(plda_tr_inv_pca.T)
116117
B = (plda_tr_inv_pca*plda_psi).dot(plda_tr_inv_pca.T)
117-
acvar, wccn = spl.eigh(B, W)
118-
x = np.dot(x-plda_mu,PCA).dot(wccn)
119-
x *= np.sqrt(x.shape[1] / np.dot(x**2, 1.0 / (acvar + 1.0)))[:,np.newaxis] # kaldi style length-norm
120-
#Lambda, Gamma, c, k = PLDA_params_to_bilinear_form(np.eye(pca_dim), np.diag(acvar), np.zeros((pca_dim,)))
121-
#return bilinear_scoring(Lambda, Gamma, c, k, x, x)
118+
acvar, wccn = spl.eigh(B, W)
119+
x = np.dot(x-plda_mu, PCA).dot(wccn)
120+
x *= np.sqrt(x.shape[1] / np.dot(x**2, 1.0 / (acvar + 1.0)))[:, np.newaxis] # kaldi style length-norm
122121
return PLDA_scoring_in_LDA_space(x, x, acvar)
123122

124123

@@ -135,8 +134,8 @@ def read_xvector_timing_dict(kaldi_segments):
135134
segs_dict[recording_file_name] = (array_of_xvector_names, array_of_start_and_end_times)
136135
"""
137136
segs = np.loadtxt(kaldi_segments, dtype=object)
138-
split_by_filename = np.nonzero(segs[1:,1]!=segs[:-1,1])[0]+1
139-
return {s[0,1]: (s[:,0], s[:,2:].astype(float)) for s in np.split(segs, split_by_filename)}
137+
split_by_filename = np.nonzero(segs[1:, 1] != segs[:-1, 1])[0] + 1
138+
return {s[0, 1]: (s[:, 0], s[:, 2:].astype(float)) for s in np.split(segs, split_by_filename)}
140139

141140

142141
def merge_adjacent_labels(starts, ends, labels):
@@ -154,13 +153,13 @@ def merge_adjacent_labels(starts, ends, labels):
154153
# Merge neighbouring (or overlaping) segments with the same label
155154
adjacent_or_overlap = np.logical_or(np.isclose(ends[:-1], starts[1:]), ends[:-1] > starts[1:])
156155
to_split = np.nonzero(np.logical_or(~adjacent_or_overlap, labels[1:] != labels[:-1]))[0]
157-
starts = starts[np.r_[0, to_split+1]]
158-
ends = ends[np.r_[to_split, -1]]
159-
labels = labels[np.r_[0, to_split+1]]
160-
156+
starts = starts[np.r_[0, to_split+1]]
157+
ends = ends[np.r_[to_split, -1]]
158+
labels = labels[np.r_[0, to_split+1]]
159+
161160
# Fix starts and ends times for overlapping segments
162-
overlaping = np.nonzero(starts[1:]<ends[:-1])[0]
163-
ends[overlaping] = starts[overlaping+1] = (ends[overlaping]+starts[overlaping+1]) / 2.0
161+
overlaping = np.nonzero(starts[1:] < ends[:-1])[0]
162+
ends[overlaping] = starts[overlaping+1] = (ends[overlaping] + starts[overlaping+1]) / 2.0
164163
return starts, ends, labels
165164

166165

@@ -178,12 +177,12 @@ def segment_to_frame_labels(starts, ends, labels, length=0, frame_rate=100., emp
178177
frms - array of frame-by-frame labels
179178
"""
180179
min_len, max_len = (length, length) if length > 0 else (-length, None)
181-
starts = np.rint(frame_rate*starts).astype(int)
182-
ends = np.rint(frame_rate*ends ).astype(int)
180+
starts = np.rint(frame_rate * starts).astype(int)
181+
ends = np.rint(frame_rate * ends).astype(int)
183182
if not ends.size:
184-
return np.full(min_len, empty_label)
183+
return np.full(min_len, empty_label)
185184

186-
frms = np.repeat(np.r_[np.c_[[empty_label]*len(labels), labels ].flat, empty_label],
185+
frms = np.repeat(np.r_[np.c_[[empty_label]*len(labels), labels].flat, empty_label],
187186
np.r_[np.c_[starts - np.r_[0, ends[:-1]], ends-starts].flat, max(0, min_len-ends[-1])])
188187
return frms[:max_len]
189188

@@ -194,7 +193,8 @@ def mkdir_p(path):
194193
except OSError as exc:
195194
if exc.errno == errno.EEXIST and os.path.isdir(path):
196195
pass
197-
else: raise
196+
else:
197+
raise
198198

199199

200200
def l2_norm(vec_or_matrix):
@@ -216,26 +216,26 @@ def l2_norm(vec_or_matrix):
216216

217217

218218
def cos_similarity(x):
219-
"""Compute cosine similarity matrix in CPU & memory sensitive way
220-
221-
Args:
222-
x (np.ndarray): embeddings, 2D array, embeddings are in rows
223-
224-
Returns:
225-
np.ndarray: cosine similarity matrix
226-
227-
"""
228-
assert x.ndim == 2, f'x has {x.ndim} dimensions, it must be matrix'
229-
x = x / (np.sqrt(np.sum(np.square(x), axis=1, keepdims=True)) + 1.0e-32)
230-
assert np.allclose(np.ones_like(x[:, 0]), np.sum(np.square(x), axis=1))
231-
max_n_elm = 200000000
232-
step = max(max_n_elm // (x.shape[0] * x.shape[0]), 1)
233-
retval = np.zeros(shape=(x.shape[0], x.shape[0]), dtype=np.float64)
234-
x0 = np.expand_dims(x, 0)
235-
x1 = np.expand_dims(x, 1)
236-
for i in range(0, x.shape[1], step):
237-
product = x0[:, :, i:i+step] * x1[:, :, i:i+step]
238-
retval += np.sum(product, axis=2, keepdims=False)
239-
assert np.all(retval >= -1.0001), retval
240-
assert np.all(retval <= 1.0001), retval
241-
return retval
219+
"""Compute cosine similarity matrix in CPU & memory sensitive way
220+
221+
Args:
222+
x (np.ndarray): embeddings, 2D array, embeddings are in rows
223+
224+
Returns:
225+
np.ndarray: cosine similarity matrix
226+
227+
"""
228+
assert x.ndim == 2, f'x has {x.ndim} dimensions, it must be matrix'
229+
x = x / (np.sqrt(np.sum(np.square(x), axis=1, keepdims=True)) + 1.0e-32)
230+
assert np.allclose(np.ones_like(x[:, 0]), np.sum(np.square(x), axis=1))
231+
max_n_elm = 200000000
232+
step = max(max_n_elm // (x.shape[0] * x.shape[0]), 1)
233+
retval = np.zeros(shape=(x.shape[0], x.shape[0]), dtype=np.float64)
234+
x0 = np.expand_dims(x, 0)
235+
x1 = np.expand_dims(x, 1)
236+
for i in range(0, x.shape[1], step):
237+
product = x0[:, :, i:i+step] * x1[:, :, i:i+step]
238+
retval += np.sum(product, axis=2, keepdims=False)
239+
assert np.all(retval >= -1.0001), retval
240+
assert np.all(retval <= 1.0001), retval
241+
return retval

‎VBx/features.py

+28-31
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,17 @@
88

99
def framing(a, window, shift=1):
1010
shape = ((a.shape[0] - window) // shift + 1, window) + a.shape[1:]
11-
strides = (a.strides[0]*shift,a.strides[0]) + a.strides[1:]
11+
strides = (a.strides[0]*shift, a.strides[0]) + a.strides[1:]
1212
return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
1313

1414

1515
# Mel and inverse Mel scale warping functions
1616
def mel_inv(x):
17-
return (np.exp(x/1127.)-1.)*700.
17+
return (np.exp(x/1127.) - 1.) * 700.
1818

1919

2020
def mel(x):
21-
return 1127.*np.log(1. + x/700.)
21+
return 1127. * np.log(1. + x/700.)
2222

2323

2424
def preemphasis(x, coef=0.97):
@@ -31,7 +31,7 @@ def mel_fbank_mx(winlen_nfft, fs, NUMCHANS=20, LOFREQ=0.0, HIFREQ=None, warp_fn=
3131
used to determine number of samples for FFT computation (NFFT).
3232
If positive, the value (window lenght) is rounded up to the
3333
next higher power of two to obtain HTK-compatible NFFT.
34-
If negative, NFFT is set to -winlen_nfft. In such case, the
34+
If negative, NFFT is set to -winlen_nfft. In such case, the
3535
parameter nfft in mfcc_htk() call should be set likewise.
3636
fs - sampling frequency (Hz, i.e. 1e7/SOURCERATE)
3737
NUMCHANS - number of filter bank bands
@@ -40,30 +40,32 @@ def mel_fbank_mx(winlen_nfft, fs, NUMCHANS=20, LOFREQ=0.0, HIFREQ=None, warp_fn=
4040
warp_fn - function for frequency warping and its inverse
4141
inv_warp_fn - inverse function to warp_fn
4242
"""
43-
if not HIFREQ: HIFREQ = 0.5 * fs
43+
HIFREQ = 0.5 * fs if not HIFREQ else HIFREQ
4444
nfft = 2**int(np.ceil(np.log2(winlen_nfft))) if winlen_nfft > 0 else -int(winlen_nfft)
4545

4646
fbin_mel = warp_fn(np.arange(nfft / 2 + 1, dtype=float) * fs / nfft)
4747
cbin_mel = np.linspace(warp_fn(LOFREQ), warp_fn(HIFREQ), NUMCHANS + 2)
4848
cind = np.floor(inv_warp_fn(cbin_mel) / fs * nfft).astype(int) + 1
4949
mfb = np.zeros((len(fbin_mel), NUMCHANS))
5050
for i in range(NUMCHANS):
51-
mfb[cind[i] :cind[i+1], i] = (cbin_mel[i] -fbin_mel[cind[i] :cind[i+1]]) / (cbin_mel[i] -cbin_mel[i+1])
52-
mfb[cind[i+1]:cind[i+2], i] = (cbin_mel[i+2]-fbin_mel[cind[i+1]:cind[i+2]]) / (cbin_mel[i+2]-cbin_mel[i+1])
53-
if LOFREQ > 0.0 and float(LOFREQ)/fs*nfft+0.5 > cind[0] and htk_bug: mfb[cind[0],:] = 0.0 # Just to be HTK compatible
51+
mfb[cind[i]:cind[i+1], i] = (cbin_mel[i] - fbin_mel[cind[i]:cind[i+1]]) / (cbin_mel[i] - cbin_mel[i+1])
52+
mfb[cind[i+1]:cind[i+2], i] = (cbin_mel[i+2] - fbin_mel[cind[i+1]:cind[i+2]]) / \
53+
(cbin_mel[i+2] - cbin_mel[i+1])
54+
if LOFREQ > 0.0 and float(LOFREQ) / fs * nfft + 0.5 > cind[0] and htk_bug:
55+
mfb[cind[0], :] = 0.0 # Just to be HTK compatible
5456
return mfb
5557

5658

5759
def fbank_htk(x, window, noverlap, fbank_mx, nfft=None, _E=None,
58-
USEPOWER=False, RAWENERGY=True, PREEMCOEF=0.97, ZMEANSOURCE=False,
59-
ENORMALISE=True, ESCALE=0.1, SILFLOOR=50.0, USEHAMMING=True):
60+
USEPOWER=False, RAWENERGY=True, PREEMCOEF=0.97, ZMEANSOURCE=False,
61+
ENORMALISE=True, ESCALE=0.1, SILFLOOR=50.0, USEHAMMING=True):
6062
"""Mel log Mel-filter bank channel outputs
6163
Returns NUMCHANS-by-M matrix of log Mel-filter bank outputs extracted from
6264
signal x, where M is the number of extracted frames, which can be computed
6365
as floor((length(x)-noverlap)/(window-noverlap)). Remaining parameters
6466
have the following meaning:
6567
x - input signal
66-
window - frame window length (in samples, i.e. WINDOWSIZE/SOURCERATE)
68+
window - frame window length (in samples, i.e. WINDOWSIZE/SOURCERATE)
6769
or vector of window weights override default windowing function
6870
(see option USEHAMMING)
6971
noverlap - overlapping between frames (in samples, i.e window-TARGETRATE/SOURCERATE)
@@ -80,13 +82,11 @@ def fbank_htk(x, window, noverlap, fbank_mx, nfft=None, _E=None,
8082
See also:
8183
mel_fbank_mx:
8284
to obtain the matrix for the parameter fbank_mx
83-
add_deriv:
85+
add_deriv:
8486
for adding delta, double delta, ... coefficients
8587
add_dither:
8688
for adding dithering in HTK-like fashion
8789
"""
88-
from time import time
89-
tm = time()
9090
if type(USEPOWER) == bool:
9191
USEPOWER += 1
9292
if np.isscalar(window):
@@ -95,40 +95,37 @@ def fbank_htk(x, window, noverlap, fbank_mx, nfft=None, _E=None,
9595
nfft = 2**int(np.ceil(np.log2(window.size)))
9696
x = framing(x.astype("float"), window.size, window.size-noverlap).copy()
9797
if ZMEANSOURCE:
98-
x -= x.mean(axis=1)[:,np.newaxis]
98+
x -= x.mean(axis=1)[:, np.newaxis]
9999
if _E is not None and RAWENERGY:
100100
energy = np.log((x**2).sum(axis=1))
101101
if PREEMCOEF is not None:
102102
x = preemphasis(x, PREEMCOEF)
103103
x *= window
104104
if _E is not None and not RAWENERGY:
105105
energy = np.log((x**2).sum(axis=1))
106-
#x = np.abs(scipy.fftpack.fft(x, nfft))
107-
#x = x[:,:x.shape[1]/2+1]
108106
x = np.fft.rfft(x, nfft)
109-
#x = np.abs(x)
110107
x = x.real**2 + x.imag**2
111108
if USEPOWER != 2:
112109
x **= 0.5 * USEPOWER
113110
x = np.log(np.maximum(1.0, np.dot(x, fbank_mx)))
114111
if _E is not None and ENORMALISE:
115-
energy = (energy - energy.max()) * ESCALE + 1.0
116-
min_val = -np.log(10**(SILFLOOR/10.)) * ESCALE + 1.0
112+
energy = (energy - energy.max()) * ESCALE + 1.0
113+
min_val = -np.log(10**(SILFLOOR/10.)) * ESCALE + 1.0
117114
energy[energy < min_val] = min_val
118115

119-
return np.hstack(([energy[:,np.newaxis]] if _E == "first" else []) + [x] +
120-
([energy[:,np.newaxis]] if (_E in ["last", True]) else []))
121-
116+
return np.hstack(([energy[:, np.newaxis]] if _E == "first" else []) + [x] +
117+
([energy[:, np.newaxis]] if (_E in ["last", True]) else []))
118+
122119

123120
def povey_window(winlen):
124-
return np.power(0.5 - 0.5*np.cos(np.linspace(0,2*np.pi, winlen)), 0.85)
121+
return np.power(0.5 - 0.5*np.cos(np.linspace(0, 2*np.pi, winlen)), 0.85)
125122

126123

127124
def add_dither(x, level=8):
128-
return x + level * (np.random.rand(*x.shape)*2-1)
125+
return x + level * (np.random.rand(*x.shape)*2 - 1)
129126

130127

131-
def cmvn_floating_kaldi(x, LC,RC, norm_vars=True):
128+
def cmvn_floating_kaldi(x, LC, RC, norm_vars=True):
132129
"""Mean and variance normalization over a floating window.
133130
x is the feature matrix (nframes x dim)
134131
LC, RC are the number of frames to the left and right defining the floating
@@ -139,11 +136,11 @@ def cmvn_floating_kaldi(x, LC,RC, norm_vars=True):
139136
Global normalization is used if nframes is less than LC+RC+1.
140137
"""
141138
N, dim = x.shape
142-
win_len = min(len(x), LC+RC+1)
143-
win_start = np.maximum(np.minimum(np.arange(-LC,N-LC), N-win_len), 0)
139+
win_len = min(len(x), LC+RC+1)
140+
win_start = np.maximum(np.minimum(np.arange(-LC, N-LC), N-win_len), 0)
144141
f = np.r_[np.zeros((1, dim)), np.cumsum(x, 0)]
145-
x = x - (f[win_start+win_len]-f[win_start])/win_len
142+
x = x - (f[win_start+win_len] - f[win_start]) / win_len
146143
if norm_vars:
147-
f = np.r_[np.zeros((1, dim)), np.cumsum(x**2, 0)]
148-
x /= np.sqrt((f[win_start+win_len]-f[win_start])/win_len)
144+
f = np.r_[np.zeros((1, dim)), np.cumsum(x**2, 0)]
145+
x /= np.sqrt((f[win_start+win_len] - f[win_start]) / win_len)
149146
return x

0 commit comments

Comments
 (0)