-
Notifications
You must be signed in to change notification settings - Fork 42
/
Copy pathdata_loader.py
67 lines (46 loc) · 1.74 KB
/
data_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import os
import numpy as np
data_path = "../home-made-data/"
def load_data(args):
n_user, n_item, train_data, test_data = load_rating(args)
n_entity, n_relation, kg = load_kg(args)
print("data loaded.")
return n_user, n_item, n_entity, n_relation, train_data, test_data, kg
def load_rating(args):
print("reading rating file ...")
# reading rating file
rating_file = data_path + args.dataset + "/ratings_final"
if os.path.exists(rating_file + ".npy"):
rating_np = np.load(rating_file + ".npy")
else:
rating_np = np.loadtxt(rating_file + ".txt", dtype=np.int32)
np.save(rating_file + ".npy", rating_np)
n_user = len(set(rating_np[:, 0]))
n_item = len(set(rating_np[:, 1]))
train_data, test_data = dataset_split(rating_np)
return n_user, n_item, train_data, test_data
def dataset_split(rating_np):
print("splitting dataset ...")
# train:test = 8:2
test_ratio = 0.2
n_ratings = rating_np.shape[0]
test_indices = np.random.choice(
list(range(n_ratings)), size=int(n_ratings * test_ratio), replace=False
)
left = set(range(n_ratings)) - set(test_indices)
train_indices = list(left - set(test_indices))
train_data = rating_np[train_indices]
test_data = rating_np[test_indices]
return train_data, test_data
def load_kg(args):
print("reading KG file ...")
# reading kg file
kg_file = data_path + args.dataset + "/kg_final"
if os.path.exists(kg_file + ".npy"):
kg = np.load(kg_file + ".npy")
else:
kg = np.loadtxt(kg_file + ".txt", dtype=np.int32)
np.save(kg_file + ".npy", kg)
n_entity = len(set(kg[:, 0]) | set(kg[:, 2]))
n_relation = len(set(kg[:, 1]))
return n_entity, n_relation, kg