-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathstrategies.py
60 lines (51 loc) · 1.65 KB
/
strategies.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from string import ascii_letters
from hypothesis import strategies as st
from hypothesis.strategies import composite
from conivel.datas.datas import NERSentence
@composite
def ner_sentence(
draw,
min_len: int = 0,
max_len: int = 100,
left_ctx_min_nb: int = 0,
left_ctx_max_nb: int = 0,
right_ctx_min_nb: int = 0,
right_ctx_max_nb: int = 0,
) -> NERSentence:
"""A strategies that generate ner sentences
:param min_len: min size of generated ``NERSentence``
:param max_len: max size of generated ``NERSentence``
:param left_ctx_min_nb: min number of left context sentences to
generate
:param left_ctx_max_nb: max number of left context sentences to
generate
:param right_ctx_min_nb: min number of right context sentences to
generate
:param right_ctx_max_nb: max number of right context sentences to
generate
:return: a generated ``NERSentence``
"""
sent_len = draw(st.integers(min_value=min_len, max_value=max_len))
tokens = draw(
st.lists(
st.text(alphabet=ascii_letters, min_size=1, max_size=5),
min_size=sent_len,
max_size=sent_len,
)
)
tags = draw(
st.lists(
st.sampled_from(["O", "B-PER", "I-PER"]),
min_size=sent_len,
max_size=sent_len,
)
)
left_ctx = [
draw(ner_sentence(min_len, max_len))
for _ in range(left_ctx_min_nb, left_ctx_max_nb)
]
right_ctx = [
draw(ner_sentence(min_len, max_len))
for _ in range(right_ctx_min_nb, right_ctx_max_nb)
]
return NERSentence(tokens, tags, left_ctx, right_ctx)