forked from Restream/reindexer
-
Notifications
You must be signed in to change notification settings - Fork 1
/
ftfastconfig.go
151 lines (147 loc) · 6.68 KB
/
ftfastconfig.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
package reindexer
type FtFastFieldConfig struct {
FieldName string `json:"field_name"`
// boost of bm25 ranking. default value 1.
Bm25Boost float64 `json:"bm25_boost"`
// weight of bm25 rank in final rank.
// 0: bm25 will not change final rank.
// 1: bm25 will affect to final rank in 0 - 100% range
Bm25Weight float64 `json:"bm25_weight"`
// boost of search query term length. default value 1
TermLenBoost float64 `json:"term_len_boost"`
// weight of search query term length in final rank.
// 0: term length will not change final rank.
// 1: term length will affect to final rank in 0 - 100% range
TermLenWeight float64 `json:"term_len_weight"`
// boost of search query term position. default value 1
PositionBoost float64 `json:"position_boost"`
// weight of search query term position in final rank.
// 0: term position will not change final rank.
// 1: term position will affect to final rank in 0 - 100% range
PositionWeight float64 `json:"position_weight"`
}
// FtFastConfig configurarion of FullText search index
type FtFastConfig struct {
// boost of bm25 ranking. default value 1.
Bm25Boost float64 `json:"bm25_boost"`
// weight of bm25 rank in final rank.
// 0: bm25 will not change final rank.
// 1: bm25 will affect to final rank in 0 - 100% range
Bm25Weight float64 `json:"bm25_weight"`
// boost of search query term distance in found document. default vaule 1
DistanceBoost float64 `json:"distance_boost"`
// weight of search query terms distance in found document in final rank.
// 0: distance will not change final rank.
// 1: distance will affect to final rank in 0 - 100% range
DistanceWeight float64 `json:"distance_weight"`
// boost of search query term length. default value 1
TermLenBoost float64 `json:"term_len_boost"`
// weight of search query term length in final rank.
// 0: term length will not change final rank.
// 1: term length will affect to final rank in 0 - 100% range
TermLenWeight float64 `json:"term_len_weight"`
// boost of search query term position. default value 1
PositionBoost float64 `json:"position_boost"`
// weight of search query term position in final rank.
// 0: term position will not change final rank.
// 1: term position will affect to final rank in 0 - 100% range
PositionWeight float64 `json:"position_weight"`
// Boost of full match of search phrase with doc
FullMatchBoost float64 `json:"full_match_boost"`
// Relevancy step of partial match: relevancy = kFullMatchProc - partialMatchDecrease * (non matched symbols) / (matched symbols)
// For example: partialMatchDecrease: 15, word in index 'terminator', pattern 'termin'. matched: 6 symbols, unmatched: 4. relevancy = 100 - (15*4)/6 = 80
PartialMatchDecrease int `json:"partial_match_decrease"`
// Minimum rank of found documents
MinRelevancy float64 `json:"min_relevancy"`
// Maximum possible typos in word.
// 0: typos is disabled, words with typos will not match
// N: words with N possible typos will match
// It is not recommended to set more than 2 possible typo: It will serously increase RAM usage, and decrease search speed
MaxTypos int `json:"max_typos"`
// Maximum word length for building and matching variants with typos. Default value is 15
MaxTypoLen int `json:"max_typo_len"`
// Maximum commit steps - set it 1 for always full rebuild - it can be from 1 to 500
MaxRebuildSteps int `json:"max_rebuild_steps"`
// Maximum words in one commit - it can be from 5 to DOUBLE_MAX
MaxStepSize int `json:"max_step_size"`
// Maximum documents which will be processed in merge query results
// Default value is 20000. Increasing this value may refine ranking
// of queries with high frequency words
MergeLimit int `json:"merge_limit"`
// List of used stemmers
Stemmers []string `json:"stemmers"`
// Enable translit variants processing
EnableTranslit bool `json:"enable_translit"`
// Enable wrong keyboard layout variants processing
EnableKbLayout bool `json:"enable_kb_layout"`
// List of stop words. Words from this list will be ignored in documents and queries
StopWords []string `json:"stop_words"`
// List of synonyms for replacement
Synonyms []struct {
// List source tokens in query, which will be replaced with alternatives
Tokens []string `json:"tokens"`
// List of alternatives, which will be used for search documents
Alternatives []string `json:"alternatives"`
} `json:"synonyms"`
// Log level of full text search engine
LogLevel int `json:"log_level"`
// Enable search by numbers as words and backwards
EnableNumbersSearch bool `json:"enable_numbers_search"`
// Enable auto index warmup after atomic namespace copy on transaction
EnableWarmupOnNsCopy bool `json:"enable_warmup_on_ns_copy"`
// Extra symbols, which will be threated as parts of word to addition to letters and digits
ExtraWordSymbols string `json:"extra_word_symbols"`
// Ratio of summation of ranks of match one term in several fields
SumRanksByFieldsRatio float64 `json:"sum_ranks_by_fields_ratio"`
// Max number of highlighted areas for each field in each document (for snippet() and highlight()). '-1' means unlimited
MaxAreasInDoc int `json:"max_areas_in_doc"`
// Max total number of highlighted areas in ft result, when result still remains cacheable. '-1' means unlimited
MaxTotalAreasToCache int `json:"max_total_areas_to_cache"`
// Configuration for certain field
FieldsCfg []FtFastFieldConfig `json:"fields,omitempty"`
// Optimize the index by memory or by cpu
Optimization string `json:"optimization,omitempty"`
// Enable to execute others queries before the ft query
EnablePreselectBeforeFt bool `json:"enable_preselect_before_ft"`
}
func DefaultFtFastConfig() FtFastConfig {
return FtFastConfig{
Bm25Boost: 1.0,
Bm25Weight: 0.1,
DistanceBoost: 1.0,
DistanceWeight: 0.5,
TermLenBoost: 1.0,
TermLenWeight: 0.3,
PositionBoost: 1.0,
PositionWeight: 0.1,
FullMatchBoost: 1.1,
PartialMatchDecrease: 15,
MinRelevancy: 0.05,
MaxTypos: 2,
MaxTypoLen: 15,
MaxRebuildSteps: 50,
MaxStepSize: 4000,
MergeLimit: 20000,
Stemmers: []string{"en", "ru"},
EnableTranslit: true,
EnableKbLayout: true,
LogLevel: 0,
ExtraWordSymbols: "/-+",
SumRanksByFieldsRatio: 0.0,
MaxAreasInDoc: 5,
MaxTotalAreasToCache: -1,
Optimization: "Memory",
EnablePreselectBeforeFt: false,
}
}
func DefaultFtFastFieldConfig(fieldName string) FtFastFieldConfig {
return FtFastFieldConfig{
FieldName: fieldName,
Bm25Boost: 1.0,
Bm25Weight: 0.1,
TermLenBoost: 1.0,
TermLenWeight: 0.3,
PositionBoost: 1.0,
PositionWeight: 0.1,
}
}