Skip to content

Commit

Permalink
feat: add es
Browse files Browse the repository at this point in the history
* Elasticsearch init

* chore: add MasaElasticClient

* chore: Elasticsearch init

* chore: elasticsearch init

* chore: Improve MasaElasticClient

* chore: Add DocumentExistsAsync

* chore: adjust ElasticsearchRelations

* chore: Support document batch operation

* chore: adjust AddElasticsearch methods

* chore: add AddElasticsearchClient methods

* chore: adjust GetPaginatedListAsync methods

* chore: add alias

* chore:  add GetIndexByAliasAsync

* chore: adjust GetAliasByIndexAsync methods

* chore: Adjust using references

* chore: add pinyin Filter

* chore: adjust MASA.Utils.Data.Elasticsearch

* chore: change methods name

* chore: Adjustment parameters

* chore: replce default IndexName

* chore: add readonly

* chore: Optimize the AddElasticsearch method Optimize the AddElasticsearch method

* chore: Simplified parameter names

Co-authored-by: zhenlei520 <[email protected]>
  • Loading branch information
zhenlei520 and zhenlei520 authored Jan 21, 2022
1 parent e419ebf commit e45be56
Show file tree
Hide file tree
Showing 60 changed files with 1,807 additions and 0 deletions.
7 changes: 7 additions & 0 deletions MASA.Utils.sln
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MASA.Utils.Caller.HttpClien
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MASA.Utils.Caller.DaprClient", "src\Caller\MASA.Utils.Caller.DaprClient\MASA.Utils.Caller.DaprClient.csproj", "{68B51DE3-FEA2-4704-B1D0-B9924F754A76}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MASA.Utils.Data.Elasticsearch", "src\Data\MASA.Utils.Data.Elasticsearch\MASA.Utils.Data.Elasticsearch.csproj", "{2FCFEA0A-146E-4F8F-ABCB-DE5A3553A263}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -121,6 +123,10 @@ Global
{68B51DE3-FEA2-4704-B1D0-B9924F754A76}.Debug|Any CPU.Build.0 = Debug|Any CPU
{68B51DE3-FEA2-4704-B1D0-B9924F754A76}.Release|Any CPU.ActiveCfg = Release|Any CPU
{68B51DE3-FEA2-4704-B1D0-B9924F754A76}.Release|Any CPU.Build.0 = Release|Any CPU
{2FCFEA0A-146E-4F8F-ABCB-DE5A3553A263}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{2FCFEA0A-146E-4F8F-ABCB-DE5A3553A263}.Debug|Any CPU.Build.0 = Debug|Any CPU
{2FCFEA0A-146E-4F8F-ABCB-DE5A3553A263}.Release|Any CPU.ActiveCfg = Release|Any CPU
{2FCFEA0A-146E-4F8F-ABCB-DE5A3553A263}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand All @@ -147,6 +153,7 @@ Global
{3BEAA614-6327-485F-A81D-3937EC7E16B5} = {72E67141-9CD8-48A9-B27B-F0C924FD4A12}
{C19B010B-AA4D-46C0-A229-034C3F36C266} = {72E67141-9CD8-48A9-B27B-F0C924FD4A12}
{68B51DE3-FEA2-4704-B1D0-B9924F754A76} = {72E67141-9CD8-48A9-B27B-F0C924FD4A12}
{2FCFEA0A-146E-4F8F-ABCB-DE5A3553A263} = {F844C2A1-C36D-400E-A0D8-7658EF9C3B93}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {D7DAA0E6-098F-4B18-8775-64FDA96F1FF0}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
namespace MASA.Utils.Data.Elasticsearch.Analysis.TokenFilters;

public interface IPinYinTokenFilter : ITokenFilter
{
/// <summary>
/// when this option enabled, eg: 刘德华>ldh
/// </summary>
[DataMember(Name = "keep_first_letter")]
bool KeepFirstLetter { get; set; }

/// <summary>
/// when this option enabled, will keep first letters separately
/// eg: 刘德华>l,d,h, default: false
/// NOTE: query result maybe too fuzziness due to term too frequency
/// </summary>
[DataMember(Name = "keep_separate_first_letter")]
bool KeepSeparateFirstLetter { get; set; }

/// <summary>
/// set max length of the first_letter result
/// </summary>
[DataMember(Name = "limit_first_letter_length")]
int LimitFirstLetterLength { get; set; }

/// <summary>
/// when this option enabled, eg: 刘德华> [liu,de,hua]
/// </summary>
[DataMember(Name = "keep_full_pinyin")]
bool KeepFullPinyin { get; set; }

/// <summary>
/// when this option enabled, eg: 刘德华> [liudehua]
/// </summary>
[DataMember(Name = "keep_joined_full_pinyin")]
bool KeepJoinedFullPinyin { get; set; }

/// <summary>
/// keep non chinese letter or number in result
/// </summary>
[DataMember(Name = "keep_none_chinese")]
bool KeepNoneChinese { get; set; }

/// <summary>
/// keep non chinese letter together
/// eg: DJ音乐家 -> DJ,yin,yue,jia
/// when set to false, eg: DJ音乐家 -> D,J,yin,yue,jia
/// NOTE: keep_none_chinese should be enabled first
/// </summary>
[DataMember(Name = "keep_none_chinese_together")]
bool KeepNoneChineseTogether { get; set; }

/// <summary>
/// keep non Chinese letters in first letter, eg: 刘德华AT2016->ldhat2016
/// </summary>
[DataMember(Name = "keep_none_chinese_in_first_letter")]
bool KeepNoneChineseInFirstLetter { get; set; }

/// <summary>
/// keep non Chinese letters in joined full pinyin, eg: 刘德华2016->liudehua2016
/// </summary>
[DataMember(Name = "keep_none_chinese_in_joined_full_pinyin")]
bool KeepNoneChineseInJoinedFullPinyin { get; set; }

/// <summary>
/// break non chinese letters into separate pinyin term if they are pinyin
/// eg: liudehuaalibaba13zhuanghan -> liu,de,hua,a,li,ba,ba,13,zhuang,han
/// NOTE: keep_none_chinese and keep_none_chinese_together should be enabled first
/// </summary>
[DataMember(Name = "none_chinese_pinyin_tokenize")]
bool NoneChinesePinyinTokenize { get; set; }

/// <summary>
/// when this option enabled, will keep original input as well
/// </summary>
[DataMember(Name = "keep_original")]
bool KeepOriginal { get; set; }

/// <summary>
/// lowercase non Chinese letters
/// </summary>
[DataMember(Name = "lowercase")]
bool Lowercase { get; set; }

[DataMember(Name = "trim_whitespace")]
bool TrimWhitespace { get; set; }

/// <summary>
/// when this option enabled, duplicated term will be removed to save index, eg: de的>de
/// NOTE: position related query maybe influenced
/// </summary>
[DataMember(Name = "remove_duplicated_term")]
bool RemoveDuplicatedTerm { get; set; }

/// <summary>
/// after 6.0, offset is strictly constrained, overlapped tokens are not allowed
/// with this parameter, overlapped token will allowed by ignore offset
/// please note, all position related query or highlight will become incorrect
/// you should use multi fields and specify different settings for different query purpose
/// if you need offset, please set it to false
/// </summary>
[DataMember(Name = "ignore_pinyin_offset")]
bool IgnorePinyinOffset { get; set; }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
namespace MASA.Utils.Data.Elasticsearch.Analysis.TokenFilters;

public class PinYinTokenFilterDescriptor
: TokenFilterDescriptorBase<PinYinTokenFilterDescriptor, IPinYinTokenFilter>, IPinYinTokenFilter
{
protected override string Type => "pinyin";

/// <summary>
/// when this option enabled, eg: 刘德华>ldh
/// </summary>
[DataMember(Name = "keep_first_letter")]
public bool KeepFirstLetter { get; set; }

/// <summary>
/// when this option enabled, will keep first letters separately
/// eg: 刘德华>l,d,h, default: false
/// NOTE: query result maybe too fuzziness due to term too frequency
/// </summary>
[DataMember(Name = "keep_separate_first_letter")]
public bool KeepSeparateFirstLetter { get; set; }

/// <summary>
/// set max length of the first_letter result
/// </summary>
[DataMember(Name = "limit_first_letter_length")]
public int LimitFirstLetterLength { get; set; }

/// <summary>
/// when this option enabled, eg: 刘德华> [liu,de,hua]
/// </summary>
[DataMember(Name = "keep_full_pinyin")]
public bool KeepFullPinyin { get; set; }

/// <summary>
/// when this option enabled, eg: 刘德华> [liudehua]
/// </summary>
[DataMember(Name = "keep_joined_full_pinyin")]
public bool KeepJoinedFullPinyin { get; set; }

/// <summary>
/// keep non chinese letter or number in result
/// </summary>
[DataMember(Name = "keep_none_chinese")]
public bool KeepNoneChinese { get; set; }

/// <summary>
/// keep non chinese letter together
/// eg: DJ音乐家 -> DJ,yin,yue,jia
/// when set to false, eg: DJ音乐家 -> D,J,yin,yue,jia
/// NOTE: keep_none_chinese should be enabled first
/// </summary>
[DataMember(Name = "keep_none_chinese_together")]
public bool KeepNoneChineseTogether { get; set; }

/// <summary>
/// keep non Chinese letters in first letter, eg: 刘德华AT2016->ldhat2016
/// </summary>
[DataMember(Name = "keep_none_chinese_in_first_letter")]
public bool KeepNoneChineseInFirstLetter { get; set; }

/// <summary>
/// keep non Chinese letters in joined full pinyin, eg: 刘德华2016->liudehua2016
/// </summary>
[DataMember(Name = "keep_none_chinese_in_joined_full_pinyin")]
public bool KeepNoneChineseInJoinedFullPinyin { get; set; }

/// <summary>
/// break non chinese letters into separate pinyin term if they are pinyin
/// eg: liudehuaalibaba13zhuanghan -> liu,de,hua,a,li,ba,ba,13,zhuang,han
/// NOTE: keep_none_chinese and keep_none_chinese_together should be enabled first
/// </summary>
[DataMember(Name = "none_chinese_pinyin_tokenize")]
public bool NoneChinesePinyinTokenize { get; set; }

/// <summary>
/// when this option enabled, will keep original input as well
/// </summary>
[DataMember(Name = "keep_original")]
public bool KeepOriginal { get; set; }

/// <summary>
/// lowercase non Chinese letters
/// </summary>
[DataMember(Name = "lowercase")]
public bool Lowercase { get; set; }

[DataMember(Name = "trim_whitespace")]
public bool TrimWhitespace { get; set; }

/// <summary>
/// when this option enabled, duplicated term will be removed to save index, eg: de的>de
/// NOTE: position related query maybe influenced
/// </summary>
[DataMember(Name = "remove_duplicated_term")]
public bool RemoveDuplicatedTerm { get; set; }

/// <summary>
/// after 6.0, offset is strictly constrained, overlapped tokens are not allowed
/// with this parameter, overlapped token will allowed by ignore offset
/// please note, all position related query or highlight will become incorrect
/// you should use multi fields and specify different settings for different query purpose
/// if you need offset, please set it to false
/// </summary>
[DataMember(Name = "ignore_pinyin_offset")]
public bool IgnorePinyinOffset { get; set; }

public PinYinTokenFilterDescriptor()
{
KeepFirstLetter = true;
KeepFullPinyin = true;
KeepNoneChinese = true;
KeepNoneChineseInFirstLetter = true;
KeepNoneChineseTogether = true;
KeepJoinedFullPinyin = true;
NoneChinesePinyinTokenize = true;
KeepOriginal = true;
LimitFirstLetterLength = 50;
Lowercase = true;
RemoveDuplicatedTerm = true;
KeepNoneChineseInJoinedFullPinyin = true;
}
}
6 changes: 6 additions & 0 deletions src/Data/MASA.Utils.Data.Elasticsearch/Const.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
namespace MASA.Utils.Data.Elasticsearch;

public class Const
{
public const string DEFAULT_CLIENT_NAME = "es";
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
namespace MASA.Utils.Data.Elasticsearch;

public class DefaultElasticsearchFactory : IElasticsearchFactory
{
private readonly Dictionary<string, ElasticsearchRelations> _relations;
private readonly ConcurrentDictionary<string, IElasticClient> _elasticClients;

public DefaultElasticsearchFactory(ElasticsearchRelationsOptions options)
{
_relations = options.Relations;
_elasticClients = new();
}

public IMasaElasticClient CreateClient()
{
return new DefaultMasaElasticClient(CreateElasticClient());
}

public IMasaElasticClient CreateClient(string name)
{
return new DefaultMasaElasticClient(CreateElasticClient(name));
}

public IElasticClient CreateElasticClient()
{
var elasticsearchRelation = _relations.Values.SingleOrDefault(r => r.IsDefault) ?? _relations.Values.FirstOrDefault();

if (elasticsearchRelation == null)
throw new Exception("The default ElasticClient is not found, please check if Elasticsearch is added");

return GetOrAddElasticClient(elasticsearchRelation.Name);
}

public IElasticClient CreateElasticClient(string name)
{
if (!_relations.ContainsKey(name))
throw new NotSupportedException($"The ElasticClient whose name is {name} is not found");

return GetOrAddElasticClient(name);
}

private IElasticClient GetOrAddElasticClient(string name)
=> _elasticClients.GetOrAdd(name, name => Create(name));

private IElasticClient Create(string name)
{
var relation = _relations[name];

var settings = relation.UseConnectionPool
? GetConnectionSettingsConnectionPool(relation)
: GetConnectionSettingsBySingleNode(relation);

return new ElasticClient(settings);
}

private ConnectionSettings GetConnectionSettingsBySingleNode(ElasticsearchRelations relation) => new(relation.Nodes[0]);

private ConnectionSettings GetConnectionSettingsConnectionPool(ElasticsearchRelations relation)
{
var pool = new StaticConnectionPool(
relation.Nodes,
relation.StaticConnectionPoolOptions?.Randomize ?? true,
relation.StaticConnectionPoolOptions?.DateTimeProvider);

var settings = new ConnectionSettings(
pool,
relation.ConnectionSettingsOptions?.Connection,
relation.ConnectionSettingsOptions?.SourceSerializerFactory,
relation.ConnectionSettingsOptions?.PropertyMappingProvider);

relation.Action?.Invoke(settings);
return settings;
}
}
Loading

0 comments on commit e45be56

Please sign in to comment.