-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathdeal_reader.py
27 lines (22 loc) · 915 Bytes
/
deal_reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import os
from data_utils.data_reader import Dataset
import re
class DealDataset(Dataset):
def __init__(self, data_path, split='train'):
self.idx = 0
self.examples = []
with open(os.path.join(data_path, split + '.txt')) as f:
for line in f.readlines():
m = re.match(r'.*<dialogue>(.+)</dialogue>.*', line)
text = m.group(1)
text = re.sub('<selection>|THEM:|YOU:', ' ', text)
text = re.sub(' +', ' ', text).strip()
turns = [t.strip() for t in text.split('<eos>') if len(t.strip()) > 0]
agreement = True
if '<no_agreement>' in line:
agreement = False
self.examples.append({
'context': turns[:],
'agreement': agreement
})
# self.examples = self.examples[1::2]