forked from masastack/MASA.Utils
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Elasticsearch init * chore: add MasaElasticClient * chore: Elasticsearch init * chore: elasticsearch init * chore: Improve MasaElasticClient * chore: Add DocumentExistsAsync * chore: adjust ElasticsearchRelations * chore: Support document batch operation * chore: adjust AddElasticsearch methods * chore: add AddElasticsearchClient methods * chore: adjust GetPaginatedListAsync methods * chore: add alias * chore: add GetIndexByAliasAsync * chore: adjust GetAliasByIndexAsync methods * chore: Adjust using references * chore: add pinyin Filter * chore: adjust MASA.Utils.Data.Elasticsearch * chore: change methods name * chore: Adjustment parameters * chore: replce default IndexName * chore: add readonly * chore: Optimize the AddElasticsearch method Optimize the AddElasticsearch method * chore: Simplified parameter names Co-authored-by: zhenlei520 <[email protected]>
- Loading branch information
1 parent
e419ebf
commit e45be56
Showing
60 changed files
with
1,807 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
103 changes: 103 additions & 0 deletions
103
src/Data/MASA.Utils.Data.Elasticsearch/Analysis/TokenFilters/IPinYinTokenFilter.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
namespace MASA.Utils.Data.Elasticsearch.Analysis.TokenFilters; | ||
|
||
public interface IPinYinTokenFilter : ITokenFilter | ||
{ | ||
/// <summary> | ||
/// when this option enabled, eg: 刘德华>ldh | ||
/// </summary> | ||
[DataMember(Name = "keep_first_letter")] | ||
bool KeepFirstLetter { get; set; } | ||
|
||
/// <summary> | ||
/// when this option enabled, will keep first letters separately | ||
/// eg: 刘德华>l,d,h, default: false | ||
/// NOTE: query result maybe too fuzziness due to term too frequency | ||
/// </summary> | ||
[DataMember(Name = "keep_separate_first_letter")] | ||
bool KeepSeparateFirstLetter { get; set; } | ||
|
||
/// <summary> | ||
/// set max length of the first_letter result | ||
/// </summary> | ||
[DataMember(Name = "limit_first_letter_length")] | ||
int LimitFirstLetterLength { get; set; } | ||
|
||
/// <summary> | ||
/// when this option enabled, eg: 刘德华> [liu,de,hua] | ||
/// </summary> | ||
[DataMember(Name = "keep_full_pinyin")] | ||
bool KeepFullPinyin { get; set; } | ||
|
||
/// <summary> | ||
/// when this option enabled, eg: 刘德华> [liudehua] | ||
/// </summary> | ||
[DataMember(Name = "keep_joined_full_pinyin")] | ||
bool KeepJoinedFullPinyin { get; set; } | ||
|
||
/// <summary> | ||
/// keep non chinese letter or number in result | ||
/// </summary> | ||
[DataMember(Name = "keep_none_chinese")] | ||
bool KeepNoneChinese { get; set; } | ||
|
||
/// <summary> | ||
/// keep non chinese letter together | ||
/// eg: DJ音乐家 -> DJ,yin,yue,jia | ||
/// when set to false, eg: DJ音乐家 -> D,J,yin,yue,jia | ||
/// NOTE: keep_none_chinese should be enabled first | ||
/// </summary> | ||
[DataMember(Name = "keep_none_chinese_together")] | ||
bool KeepNoneChineseTogether { get; set; } | ||
|
||
/// <summary> | ||
/// keep non Chinese letters in first letter, eg: 刘德华AT2016->ldhat2016 | ||
/// </summary> | ||
[DataMember(Name = "keep_none_chinese_in_first_letter")] | ||
bool KeepNoneChineseInFirstLetter { get; set; } | ||
|
||
/// <summary> | ||
/// keep non Chinese letters in joined full pinyin, eg: 刘德华2016->liudehua2016 | ||
/// </summary> | ||
[DataMember(Name = "keep_none_chinese_in_joined_full_pinyin")] | ||
bool KeepNoneChineseInJoinedFullPinyin { get; set; } | ||
|
||
/// <summary> | ||
/// break non chinese letters into separate pinyin term if they are pinyin | ||
/// eg: liudehuaalibaba13zhuanghan -> liu,de,hua,a,li,ba,ba,13,zhuang,han | ||
/// NOTE: keep_none_chinese and keep_none_chinese_together should be enabled first | ||
/// </summary> | ||
[DataMember(Name = "none_chinese_pinyin_tokenize")] | ||
bool NoneChinesePinyinTokenize { get; set; } | ||
|
||
/// <summary> | ||
/// when this option enabled, will keep original input as well | ||
/// </summary> | ||
[DataMember(Name = "keep_original")] | ||
bool KeepOriginal { get; set; } | ||
|
||
/// <summary> | ||
/// lowercase non Chinese letters | ||
/// </summary> | ||
[DataMember(Name = "lowercase")] | ||
bool Lowercase { get; set; } | ||
|
||
[DataMember(Name = "trim_whitespace")] | ||
bool TrimWhitespace { get; set; } | ||
|
||
/// <summary> | ||
/// when this option enabled, duplicated term will be removed to save index, eg: de的>de | ||
/// NOTE: position related query maybe influenced | ||
/// </summary> | ||
[DataMember(Name = "remove_duplicated_term")] | ||
bool RemoveDuplicatedTerm { get; set; } | ||
|
||
/// <summary> | ||
/// after 6.0, offset is strictly constrained, overlapped tokens are not allowed | ||
/// with this parameter, overlapped token will allowed by ignore offset | ||
/// please note, all position related query or highlight will become incorrect | ||
/// you should use multi fields and specify different settings for different query purpose | ||
/// if you need offset, please set it to false | ||
/// </summary> | ||
[DataMember(Name = "ignore_pinyin_offset")] | ||
bool IgnorePinyinOffset { get; set; } | ||
} |
122 changes: 122 additions & 0 deletions
122
src/Data/MASA.Utils.Data.Elasticsearch/Analysis/TokenFilters/PinYinTokenFilterDescriptor.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
namespace MASA.Utils.Data.Elasticsearch.Analysis.TokenFilters; | ||
|
||
public class PinYinTokenFilterDescriptor | ||
: TokenFilterDescriptorBase<PinYinTokenFilterDescriptor, IPinYinTokenFilter>, IPinYinTokenFilter | ||
{ | ||
protected override string Type => "pinyin"; | ||
|
||
/// <summary> | ||
/// when this option enabled, eg: 刘德华>ldh | ||
/// </summary> | ||
[DataMember(Name = "keep_first_letter")] | ||
public bool KeepFirstLetter { get; set; } | ||
|
||
/// <summary> | ||
/// when this option enabled, will keep first letters separately | ||
/// eg: 刘德华>l,d,h, default: false | ||
/// NOTE: query result maybe too fuzziness due to term too frequency | ||
/// </summary> | ||
[DataMember(Name = "keep_separate_first_letter")] | ||
public bool KeepSeparateFirstLetter { get; set; } | ||
|
||
/// <summary> | ||
/// set max length of the first_letter result | ||
/// </summary> | ||
[DataMember(Name = "limit_first_letter_length")] | ||
public int LimitFirstLetterLength { get; set; } | ||
|
||
/// <summary> | ||
/// when this option enabled, eg: 刘德华> [liu,de,hua] | ||
/// </summary> | ||
[DataMember(Name = "keep_full_pinyin")] | ||
public bool KeepFullPinyin { get; set; } | ||
|
||
/// <summary> | ||
/// when this option enabled, eg: 刘德华> [liudehua] | ||
/// </summary> | ||
[DataMember(Name = "keep_joined_full_pinyin")] | ||
public bool KeepJoinedFullPinyin { get; set; } | ||
|
||
/// <summary> | ||
/// keep non chinese letter or number in result | ||
/// </summary> | ||
[DataMember(Name = "keep_none_chinese")] | ||
public bool KeepNoneChinese { get; set; } | ||
|
||
/// <summary> | ||
/// keep non chinese letter together | ||
/// eg: DJ音乐家 -> DJ,yin,yue,jia | ||
/// when set to false, eg: DJ音乐家 -> D,J,yin,yue,jia | ||
/// NOTE: keep_none_chinese should be enabled first | ||
/// </summary> | ||
[DataMember(Name = "keep_none_chinese_together")] | ||
public bool KeepNoneChineseTogether { get; set; } | ||
|
||
/// <summary> | ||
/// keep non Chinese letters in first letter, eg: 刘德华AT2016->ldhat2016 | ||
/// </summary> | ||
[DataMember(Name = "keep_none_chinese_in_first_letter")] | ||
public bool KeepNoneChineseInFirstLetter { get; set; } | ||
|
||
/// <summary> | ||
/// keep non Chinese letters in joined full pinyin, eg: 刘德华2016->liudehua2016 | ||
/// </summary> | ||
[DataMember(Name = "keep_none_chinese_in_joined_full_pinyin")] | ||
public bool KeepNoneChineseInJoinedFullPinyin { get; set; } | ||
|
||
/// <summary> | ||
/// break non chinese letters into separate pinyin term if they are pinyin | ||
/// eg: liudehuaalibaba13zhuanghan -> liu,de,hua,a,li,ba,ba,13,zhuang,han | ||
/// NOTE: keep_none_chinese and keep_none_chinese_together should be enabled first | ||
/// </summary> | ||
[DataMember(Name = "none_chinese_pinyin_tokenize")] | ||
public bool NoneChinesePinyinTokenize { get; set; } | ||
|
||
/// <summary> | ||
/// when this option enabled, will keep original input as well | ||
/// </summary> | ||
[DataMember(Name = "keep_original")] | ||
public bool KeepOriginal { get; set; } | ||
|
||
/// <summary> | ||
/// lowercase non Chinese letters | ||
/// </summary> | ||
[DataMember(Name = "lowercase")] | ||
public bool Lowercase { get; set; } | ||
|
||
[DataMember(Name = "trim_whitespace")] | ||
public bool TrimWhitespace { get; set; } | ||
|
||
/// <summary> | ||
/// when this option enabled, duplicated term will be removed to save index, eg: de的>de | ||
/// NOTE: position related query maybe influenced | ||
/// </summary> | ||
[DataMember(Name = "remove_duplicated_term")] | ||
public bool RemoveDuplicatedTerm { get; set; } | ||
|
||
/// <summary> | ||
/// after 6.0, offset is strictly constrained, overlapped tokens are not allowed | ||
/// with this parameter, overlapped token will allowed by ignore offset | ||
/// please note, all position related query or highlight will become incorrect | ||
/// you should use multi fields and specify different settings for different query purpose | ||
/// if you need offset, please set it to false | ||
/// </summary> | ||
[DataMember(Name = "ignore_pinyin_offset")] | ||
public bool IgnorePinyinOffset { get; set; } | ||
|
||
public PinYinTokenFilterDescriptor() | ||
{ | ||
KeepFirstLetter = true; | ||
KeepFullPinyin = true; | ||
KeepNoneChinese = true; | ||
KeepNoneChineseInFirstLetter = true; | ||
KeepNoneChineseTogether = true; | ||
KeepJoinedFullPinyin = true; | ||
NoneChinesePinyinTokenize = true; | ||
KeepOriginal = true; | ||
LimitFirstLetterLength = 50; | ||
Lowercase = true; | ||
RemoveDuplicatedTerm = true; | ||
KeepNoneChineseInJoinedFullPinyin = true; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
namespace MASA.Utils.Data.Elasticsearch; | ||
|
||
public class Const | ||
{ | ||
public const string DEFAULT_CLIENT_NAME = "es"; | ||
} |
74 changes: 74 additions & 0 deletions
74
src/Data/MASA.Utils.Data.Elasticsearch/DefaultElasticsearchFactory.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
namespace MASA.Utils.Data.Elasticsearch; | ||
|
||
public class DefaultElasticsearchFactory : IElasticsearchFactory | ||
{ | ||
private readonly Dictionary<string, ElasticsearchRelations> _relations; | ||
private readonly ConcurrentDictionary<string, IElasticClient> _elasticClients; | ||
|
||
public DefaultElasticsearchFactory(ElasticsearchRelationsOptions options) | ||
{ | ||
_relations = options.Relations; | ||
_elasticClients = new(); | ||
} | ||
|
||
public IMasaElasticClient CreateClient() | ||
{ | ||
return new DefaultMasaElasticClient(CreateElasticClient()); | ||
} | ||
|
||
public IMasaElasticClient CreateClient(string name) | ||
{ | ||
return new DefaultMasaElasticClient(CreateElasticClient(name)); | ||
} | ||
|
||
public IElasticClient CreateElasticClient() | ||
{ | ||
var elasticsearchRelation = _relations.Values.SingleOrDefault(r => r.IsDefault) ?? _relations.Values.FirstOrDefault(); | ||
|
||
if (elasticsearchRelation == null) | ||
throw new Exception("The default ElasticClient is not found, please check if Elasticsearch is added"); | ||
|
||
return GetOrAddElasticClient(elasticsearchRelation.Name); | ||
} | ||
|
||
public IElasticClient CreateElasticClient(string name) | ||
{ | ||
if (!_relations.ContainsKey(name)) | ||
throw new NotSupportedException($"The ElasticClient whose name is {name} is not found"); | ||
|
||
return GetOrAddElasticClient(name); | ||
} | ||
|
||
private IElasticClient GetOrAddElasticClient(string name) | ||
=> _elasticClients.GetOrAdd(name, name => Create(name)); | ||
|
||
private IElasticClient Create(string name) | ||
{ | ||
var relation = _relations[name]; | ||
|
||
var settings = relation.UseConnectionPool | ||
? GetConnectionSettingsConnectionPool(relation) | ||
: GetConnectionSettingsBySingleNode(relation); | ||
|
||
return new ElasticClient(settings); | ||
} | ||
|
||
private ConnectionSettings GetConnectionSettingsBySingleNode(ElasticsearchRelations relation) => new(relation.Nodes[0]); | ||
|
||
private ConnectionSettings GetConnectionSettingsConnectionPool(ElasticsearchRelations relation) | ||
{ | ||
var pool = new StaticConnectionPool( | ||
relation.Nodes, | ||
relation.StaticConnectionPoolOptions?.Randomize ?? true, | ||
relation.StaticConnectionPoolOptions?.DateTimeProvider); | ||
|
||
var settings = new ConnectionSettings( | ||
pool, | ||
relation.ConnectionSettingsOptions?.Connection, | ||
relation.ConnectionSettingsOptions?.SourceSerializerFactory, | ||
relation.ConnectionSettingsOptions?.PropertyMappingProvider); | ||
|
||
relation.Action?.Invoke(settings); | ||
return settings; | ||
} | ||
} |
Oops, something went wrong.