-
Notifications
You must be signed in to change notification settings - Fork 440
/
Copy pathtest_deployment.py
57 lines (46 loc) · 1.7 KB
/
test_deployment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import time
import pytest
import random
import deepeval
from deepeval.metrics import BaseMetric
from deepeval.test_case import LLMTestCase
from deepeval import assert_test
from deepeval.dataset import EvaluationDataset
# Inherit BaseMetric
class FakeMetric(BaseMetric):
# This metric by default checks if the latency is greater than 10 seconds
def __init__(self, threshold: float = 0.5):
super().__init__()
self.threshold = threshold
def measure(self, test_case: LLMTestCase):
# Set self.success and self.score in the "measure" method
self.score = random.uniform(0.5, 1.0)
self.success = self.score >= self.threshold
# You can also optionally set a reason for the score returned.
# This is particularly useful for a score computed using LLMs
self.reason = "This metric looking good!"
return self.score
async def a_measure(self, test_case: LLMTestCase):
self.score = random.uniform(0.5, 1.0)
self.success = self.score >= self.threshold
# You can also optionally set a reason for the score returned.
# This is particularly useful for a score computed using LLMs
self.reason = "This async metric looking good!"
random_sleep_time = random.uniform(1, 5)
time.sleep(random_sleep_time)
return self.score
def is_successful(self):
return self.success
@property
def __name__(self):
return "Coherence"
dataset = EvaluationDataset()
# Pull from Confident
dataset.pull(alias="test")
@pytest.mark.parametrize(
"test_case",
dataset,
)
def test_customer_chatbot(test_case: LLMTestCase):
fake_metric = FakeMetric()
assert_test(test_case, [fake_metric])