-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathevaluation.py
131 lines (112 loc) · 4.8 KB
/
evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import torch
import numpy as np
import multiprocessing
import time
import dataset
import feature_extraction
import neural_net
import myconfig
def run_inference(features, encoder,
full_sequence=myconfig.USE_FULL_SEQUENCE_INFERENCE):
"""Get the embedding of an utterance using the encoder."""
if full_sequence:
# Full sequence inference.
batch_input = torch.unsqueeze(torch.from_numpy(
features), dim=0).float().to(myconfig.DEVICE)
batch_output = encoder(batch_input)
return batch_output[0, :].cpu().data.numpy()
else:
# Sliding window inference.
sliding_windows = feature_extraction.extract_sliding_windows(features)
if not sliding_windows:
return None
batch_input = torch.from_numpy(
np.stack(sliding_windows)).float().to(myconfig.DEVICE)
batch_output = encoder(batch_input)
# Aggregate the inference outputs from sliding windows.
aggregated_output = torch.mean(batch_output, dim=0, keepdim=False).cpu()
return aggregated_output.data.numpy()
def cosine_similarity(a, b):
"""Compute cosine similarity between two embeddings."""
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
class TripletScoreFetcher:
"""Class for computing triplet scores with multi-processing."""
def __init__(self, spk_to_utts, encoder, num_eval_triplets):
self.spk_to_utts = spk_to_utts
self.encoder = encoder
self.num_eval_triplets = num_eval_triplets
def __call__(self, i):
"""Get the labels and scores from a triplet."""
anchor, pos, neg = feature_extraction.get_triplet_features(
self.spk_to_utts)
anchor_embedding = run_inference(anchor, self.encoder)
pos_embedding = run_inference(pos, self.encoder)
neg_embedding = run_inference(neg, self.encoder)
if ((anchor_embedding is None) or
(pos_embedding is None) or
(neg_embedding is None)):
# Some utterances might be smaller than a single sliding window.
return ([], [])
triplet_labels = [1, 0]
triplet_scores = [
cosine_similarity(anchor_embedding, pos_embedding),
cosine_similarity(anchor_embedding, neg_embedding)]
print("triplets evaluated:", i, "/", self.num_eval_triplets)
return (triplet_labels, triplet_scores)
def compute_scores(encoder, spk_to_utts, num_eval_triplets=myconfig.NUM_EVAL_TRIPLETS):
"""Compute cosine similarity scores from testing data."""
labels = []
scores = []
fetcher = TripletScoreFetcher(spk_to_utts, encoder, num_eval_triplets)
# CUDA does not support multi-processing, so using a ThreadPool.
with multiprocessing.pool.ThreadPool(myconfig.NUM_PROCESSES) as pool:
while num_eval_triplets > len(labels) // 2:
label_score_pairs = pool.map(fetcher, range(
len(labels) // 2, num_eval_triplets))
for triplet_labels, triplet_scores in label_score_pairs:
labels += triplet_labels
scores += triplet_scores
print("Evaluated", len(labels) // 2, "triplets in total")
return (labels, scores)
def compute_eer(labels, scores):
"""Compute the Equal Error Rate (EER)."""
if len(labels) != len(scores):
raise ValueError("Length of labels and scored must match")
eer_threshold = None
eer = None
min_delta = 1
threshold = 0.0
while threshold < 1.0:
accept = [score >= threshold for score in scores]
fa = [a and (1-l) for a, l in zip(accept, labels)]
fr = [(1-a) and l for a, l in zip(accept, labels)]
far = sum(fa) / (len(labels) - sum(labels))
frr = sum(fr) / sum(labels)
delta = abs(far - frr)
if delta < min_delta:
min_delta = delta
eer = (far + frr) / 2
eer_threshold = threshold
threshold += myconfig.EVAL_THRESHOLD_STEP
return eer, eer_threshold
def run_eval():
"""Run evaluation of the saved model on test data."""
start_time = time.time()
if myconfig.TEST_DATA_CSV:
spk_to_utts = dataset.get_csv_spk_to_utts(
myconfig.TEST_DATA_CSV)
print("Evaluation data:", myconfig.TEST_DATA_CSV)
else:
spk_to_utts = dataset.get_librispeech_spk_to_utts(
myconfig.TEST_DATA_DIR)
print("Evaluation data:", myconfig.TEST_DATA_DIR)
encoder = neural_net.get_speaker_encoder(
myconfig.SAVED_MODEL_PATH)
labels, scores = compute_scores(
encoder, spk_to_utts, myconfig.NUM_EVAL_TRIPLETS)
eer, eer_threshold = compute_eer(labels, scores)
eval_time = time.time() - start_time
print("Finished evaluation in", eval_time, "seconds")
print("eer_threshold =", eer_threshold, "eer =", eer)
if __name__ == "__main__":
run_eval()