forked from lgalke/vec4ir
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.yml
79 lines (67 loc) · 2.21 KB
/
config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
---
embeddings:
glove:
# path: "/data3/lgalke/glove.840B.300d.w2v_format.txt"
path: "/data3/lgalke/glove.gnsm"
oov_token: "UNK" # wild guess
analyzer:
lowercase: False
tokenizer: "nltk"
stop_words: False
word2vec:
path: "/data3/lgalke/GoogleNews-vectors-negative300.bin.gz"
oov_token: "UNK" # not so wild guess
analyzer:
lowercase: False
tokenizer: "sword"
stop_words: True
doc2vec:
path: "/data3/lgalke/1000_dim_wiki_doc2vec.model"
data:
econ62k:
type: "QuadflorLike"
X:
"/data3/kd-mp/kd-mp/Resources/Goldstandard/formatted_econbiz-annotation-62k-titles.csv"
y:
"/data3/kd-mp/kd-mp/Resources/Goldstandard/econbiz-stw9-formatted.csv"
thes: "/data3/kd-mp/kd-mp/Resources/Ontologies/stw.json"
econfull:
type: "QuadflorLike"
X: "/data3/kd-mp/data/ZBW62k/Text"
y:
"/data3/kd-mp/kd-mp/Resources/Goldstandard/econbiz-stw9-formatted.csv"
thes: "/data3/kd-mp/kd-mp/Resources/Ontologies/stw.json"
reuters:
type: "QuadflorLike"
X: "/data11/fmai/data/TREC-Reuters/rcv1_titles_100000.tsv"
y: "/data11/fmai/data/TREC-Reuters/rcv1_gold100000.tsv"
thes: "/data3/kd-mp/kd-mp/Resources/Ontologies/reuters_dict_fixed.json"
reutersfull:
type: "QuadflorLike"
X: "/data11/fmai/data/TREC-Reuters/rcv1_fulltext_100000"
y: "/data11/fmai/data/TREC-Reuters/rcv1_gold100000.tsv"
thes: "/data3/kd-mp/kd-mp/Resources/Ontologies/reuters_dict_fixed.json"
short-ntcir2:
type: "ntcir"
root_path: "../data/NTCIR2/"
field: "title"
topic: "title"
rels: 2
short-ntcir2abstract:
type: "ntcir"
root_path: "../data/NTCIR2/"
field: "content"
topic: "title"
rels: 2
long-ntcir2:
type: "ntcir"
root_path: "../data/NTCIR2/"
field: "title"
topic: "description"
rels: 2
long-ntcir2abstract:
type: "ntcir"
root_path: "../data/NTCIR2/"
field: "content"
topic: "description"
rels: 2