forked from nyaadevs/nyaa
-
Notifications
You must be signed in to change notification settings - Fork 0
/
es_mapping.yml
123 lines (122 loc) · 3.12 KB
/
es_mapping.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
---
# CREATE DTABASE/TABLE equivalent for elasticsearch, in yaml
# fo inline comments.
settings:
analysis:
analyzer:
my_search_analyzer:
type: custom
tokenizer: standard
char_filter:
- my_char_filter
filter:
- standard
- lowercase
my_index_analyzer:
type: custom
tokenizer: standard
char_filter:
- my_char_filter
filter:
- resolution
- lowercase
- my_ngram
- word_delimit
- trim_zero
# For matching full words longer than the ngram limit (15 chars)
my_fullword_index_analyzer:
type: custom
tokenizer: standard
char_filter:
- my_char_filter
filter:
- lowercase
- word_delimit
# These should be enough, as my_index_analyzer will match the rest
filter:
my_ngram:
type: edgeNGram
min_gram: 1
max_gram: 15
resolution:
type: pattern_capture
patterns: ["(\\d+)[xX](\\d+)"]
trim_zero:
type: pattern_capture
patterns: ["0*([0-9]*)"]
word_delimit:
type: word_delimiter
preserve_original: true
split_on_numerics: false
char_filter:
my_char_filter:
type: mapping
mappings: ["-=>_", "!=>_", "_=>\\u0020"]
index:
# we're running a single es node, so no sharding necessary,
# plus replicas don't really help either.
number_of_shards: 1
number_of_replicas : 0
mapper:
# disable elasticsearch's "helpful" autoschema
dynamic: false
# since we disabled the _all field, default query the
# name of the torrent.
query:
default_field: display_name
mappings:
torrent:
# don't want everything concatenated
_all:
enabled: false
properties:
id:
type: long
display_name:
# TODO could do a fancier tokenizer here to parse out the
# the scene convention of stuff in brackets, plus stuff like k-on
type: text
analyzer: my_index_analyzer
fielddata: true # Is this required?
fields:
# Multi-field for full-word matching (when going over ngram limits)
# Note: will have to be queried for, not automatic
fullword:
type: text
analyzer: my_fullword_index_analyzer
created_time:
type: date
# Only in the ES index for generating magnet links
info_hash:
enabled: false
filesize:
type: long
anonymous:
type: boolean
trusted:
type: boolean
remake:
type: boolean
complete:
type: boolean
hidden:
type: boolean
deleted:
type: boolean
has_torrent:
type: boolean
download_count:
type: long
leech_count:
type: long
seed_count:
type: long
comment_count:
type: long
# these ids are really only for filtering, thus keyword
uploader_id:
type: keyword
main_category_id:
type: keyword
sub_category_id:
type: keyword