-
-
Notifications
You must be signed in to change notification settings - Fork 648
/
Copy pathutils.py
41 lines (28 loc) · 1.48 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import torch
from dataset import TransformerDataset
from datasets import load_dataset
from model import TransformerModel
from transformers import AutoTokenizer
from ignite.handlers import DiskSaver
def get_tokenizer(tokenizer_name, tokenizer_dir):
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, cache_dir=tokenizer_dir, do_lower_case=True)
return tokenizer
def get_model(model_name, model_dir, drop_out, n_fc, num_classes):
model = TransformerModel(model_name, model_dir, drop_out, n_fc, num_classes)
return model
def get_dataset(cache_dir, tokenizer_name, tokenizer_dir, max_length):
train_dataset, test_dataset = load_dataset("imdb", split=["train", "test"], cache_dir=cache_dir)
tokenizer = get_tokenizer(tokenizer_name, tokenizer_dir)
train_texts, train_labels = train_dataset["text"], train_dataset["label"]
test_texts, test_labels = test_dataset["text"], test_dataset["label"]
train_dataset = TransformerDataset(train_texts, train_labels, tokenizer, max_length)
test_dataset = TransformerDataset(test_texts, test_labels, tokenizer, max_length)
return train_dataset, test_dataset
def thresholded_output_transform(output):
y_pred, y = output
return torch.round(torch.sigmoid(y_pred)), y
def get_save_handler(config):
if config["with_clearml"]:
from ignite.handlers.clearml_logger import ClearMLSaver
return ClearMLSaver(dirname=config["output_dir"])
return DiskSaver(config["output_dir"], require_empty=False)