forked from UKPLab/sentence-transformers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_compute_embeddings.py
63 lines (46 loc) · 2.26 KB
/
test_compute_embeddings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
"""
Computes embeddings
"""
import unittest
from sentence_transformers import SentenceTransformer
import numpy as np
class ComputeEmbeddingsTest(unittest.TestCase):
def setUp(self):
self.model = SentenceTransformer('paraphrase-distilroberta-base-v1')
def test_encode_token_embeddings(self):
"""
Test that encode(output_value='token_embeddings') works
:return:
"""
sent = ["Hello Word, a test sentence", "Here comes another sentence", "My final sentence", "Sentences", "Sentence five five five five five five five"]
emb = self.model.encode(sent, output_value='token_embeddings', batch_size=2)
assert len(emb) == len(sent)
for s, e in zip(sent, emb):
assert len(self.model.tokenize([s])['input_ids'][0]) == e.shape[0]
def test_encode_single_sentences(self):
#Single sentence
emb = self.model.encode("Hello Word, a test sentence")
assert emb.shape == (768,)
assert abs(np.sum(emb) - 7.9811716) < 0.001
# Single sentence as list
emb = self.model.encode(["Hello Word, a test sentence"])
assert emb.shape == (1, 768)
assert abs(np.sum(emb) - 7.9811716) < 0.001
# Sentence list
emb = self.model.encode(["Hello Word, a test sentence", "Here comes another sentence", "My final sentence"])
assert emb.shape == (3, 768)
assert abs(np.sum(emb) - 22.968266) < 0.001
def test_encode_normalize(self):
emb = self.model.encode(["Hello Word, a test sentence", "Here comes another sentence", "My final sentence"], normalize_embeddings=True)
assert emb.shape == (3, 768)
for norm in np.linalg.norm(emb, axis=1):
assert abs(norm - 1) < 0.001
def test_encode_tuple_sentences(self):
# Input a sentence tuple
emb = self.model.encode([("Hello Word, a test sentence", "Second input for model")])
assert emb.shape == (1, 768)
assert abs(np.sum(emb) - 9.503508) < 0.001
# List of sentence tuples
emb = self.model.encode([("Hello Word, a test sentence", "Second input for model"), ("My second tuple", "With two inputs"), ("Final tuple", "final test")])
assert emb.shape == (3, 768)
assert abs(np.sum(emb) - 32.14627) < 0.001