forked from NicolasHug/Surprise
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_algorithms.py
116 lines (95 loc) · 3.82 KB
/
test_algorithms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
"""
Module for testing prediction algorithms.
"""
from __future__ import (absolute_import, division, print_function,
unicode_literals)
import os
import pytest
from six import iteritems
from surprise import NormalPredictor
from surprise import BaselineOnly
from surprise import KNNBasic
from surprise import KNNWithMeans
from surprise import KNNBaseline
from surprise import SVD
from surprise import SVDpp
from surprise import NMF
from surprise import SlopeOne
from surprise import CoClustering
from surprise import Dataset
from surprise import Reader
from surprise import KNNWithZScore
from surprise.model_selection import train_test_split
from surprise import accuracy
def test_unknown_user_or_item(toy_data):
"""Ensure that all algorithms act gracefully when asked to predict a rating
of an unknown user, an unknown item, and when both are unknown.
"""
trainset = toy_data.build_full_trainset()
klasses = (NormalPredictor, BaselineOnly, KNNBasic, KNNWithMeans,
KNNBaseline, SVD, SVDpp, NMF, SlopeOne, CoClustering,
KNNWithZScore)
for klass in klasses:
algo = klass()
algo.fit(trainset)
algo.predict('user0', 'unknown_item', None)
algo.predict('unkown_user', 'item0', None)
algo.predict('unkown_user', 'unknown_item', None)
# unrelated, but test the fit().test() one-liner:
trainset, testset = train_test_split(toy_data, test_size=2)
for klass in klasses:
algo = klass()
algo.fit(trainset).test(testset)
with pytest.warns(UserWarning):
algo.train(trainset).test(testset)
def test_knns(u1_ml100k, pkf):
"""Ensure the k and min_k parameters are effective for knn algorithms."""
# Actually, as KNNWithMeans and KNNBaseline have back up solutions for when
# there are not enough neighbors, we can't really test them...
klasses = (KNNBasic, ) # KNNWithMeans, KNNBaseline)
k, min_k = 20, 5
for klass in klasses:
algo = klass(k=k, min_k=min_k)
for trainset, testset in pkf.split(u1_ml100k):
algo.fit(trainset)
predictions = algo.test(testset)
for pred in predictions:
if not pred.details['was_impossible']:
assert min_k <= pred.details['actual_k'] <= k
def test_nearest_neighbors():
"""Ensure the nearest neighbors are different when using user-user
similarity vs item-item."""
reader = Reader(line_format='user item rating', sep=' ', skip_lines=3)
data_file = os.path.dirname(os.path.realpath(__file__)) + '/custom_train'
data = Dataset.load_from_file(data_file, reader, rating_scale=(1, 5))
trainset = data.build_full_trainset()
algo_ub = KNNBasic(sim_options={'user_based': True})
algo_ub.fit(trainset)
algo_ib = KNNBasic(sim_options={'user_based': False})
algo_ib.fit(trainset)
assert algo_ub.get_neighbors(0, k=10) != algo_ib.get_neighbors(0, k=10)
def test_sanity_checks(u1_ml100k, pkf):
"""
Basic sanity checks for all algorithms: check that RMSE stays the same.
"""
expected_rmse = {
BaselineOnly: 1.0268524031297395,
KNNBasic: 1.1337265249554591,
KNNWithMeans: 1.1043129441881696,
KNNBaseline: 1.0700718041752253,
KNNWithZScore: 1.11179436167853,
SVD: 1.0077323320656948,
SVDpp: 1.00284553561452,
NMF: 1.0865370266372372,
SlopeOne: 1.1559939123891685,
CoClustering: 1.0841941385276614,
}
for klass, rmse in iteritems(expected_rmse):
if klass in (SVD, SVDpp, NMF, CoClustering):
algo = klass(random_state=0)
else:
algo = klass()
trainset, testset = next(pkf.split(u1_ml100k))
algo.fit(trainset)
predictions = algo.test(testset)
assert accuracy.rmse(predictions, verbose=False) == rmse