forked from meta-toolkit/meta
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathranker_test.cpp
107 lines (88 loc) · 3.14 KB
/
ranker_test.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
/**
* @file ranker_test.cpp
* @author Sean Massung
*/
#include "bandit/bandit.h"
#include "create_config.h"
#include "meta/corpus/document.h"
#include "meta/index/ranker/all.h"
#include "meta/index/forward_index.h"
using namespace bandit;
using namespace meta;
namespace
{
template <class Ranker, class Index>
void test_rank(Ranker& r, Index& idx, const std::string& encoding)
{
// exhaustive search for each document
for (size_t i = 0; i < idx.num_docs(); ++i)
{
auto d_id = idx.docs()[i];
auto path = *idx.template metadata<std::string>(d_id, "path");
corpus::document query{doc_id{i}};
query.content(filesystem::file_text(path), encoding);
auto ranking = r.score(idx, query);
AssertThat(ranking.size(), Equals(10ul)); // default is 10 docs
// since we're searching for a document already in the index, the same
// document should be ranked first, but there are a few duplicate
// documents......
if (ranking[0].d_id != i)
{
AssertThat(ranking[1].d_id, Equals(i));
AssertThat(ranking[0].score,
EqualsWithDelta(ranking[1].score, 0.0001));
}
}
// sanity checks for simple query
corpus::document query;
query.content("character");
auto ranking = r.score(idx, query);
// ensure there is diversity in the top 10 documents
AssertThat(ranking[0].score, Is().GreaterThan(ranking.back().score));
// check for sorted-ness of ranking
for (uint64_t i = 1; i < ranking.size(); ++i)
{
AssertThat(ranking[i - 1].score,
Is().GreaterThanOrEqualTo(ranking[i].score));
}
}
}
go_bandit([]() {
describe("[rankers]", []() {
auto config = tests::create_config("file");
filesystem::remove_all("ceeaus");
auto idx = index::make_index<index::inverted_index>(*config);
std::string encoding = "utf-8";
if (auto enc = config->get_as<std::string>("encoding"))
encoding = *enc;
it("should be able to rank with absolute discounting", [&]() {
index::absolute_discount r;
test_rank(r, *idx, encoding);
});
it("should be able to rank with Dirichlet prior", [&]() {
index::dirichlet_prior r;
test_rank(r, *idx, encoding);
});
it("should be able to rank with Jelinek-Mercer", [&]() {
index::jelinek_mercer r;
test_rank(r, *idx, encoding);
});
it("should be able to rank with Okapi BM25", [&]() {
index::okapi_bm25 r;
test_rank(r, *idx, encoding);
});
it("should be able to rank with pivoted length normalization", [&]() {
index::pivoted_length r;
test_rank(r, *idx, encoding);
});
it("should be able to rank with KL-divergence pseudo-relevance "
"feedback",
[&]() {
index::kl_divergence_prf r{
index::make_index<index::forward_index>(*config)};
test_rank(r, *idx, encoding);
});
idx = nullptr;
filesystem::remove_all("ceeaus");
});
});