Skip to content

Commit

Permalink
feature: add RemoveWord api
Browse files Browse the repository at this point in the history
  • Loading branch information
AydenZ committed Aug 24, 2022
1 parent 4b465a2 commit bf251c0
Show file tree
Hide file tree
Showing 6 changed files with 51 additions and 1 deletion.
9 changes: 9 additions & 0 deletions deps/cppjieba/DictTrie.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,15 @@ class DictTrie {
return true;
}

bool DeleteUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
DictUnit node_info;
if (!MakeNodeInfo(node_info, word, user_word_default_weight_, tag)) {
return false;
}
trie_->DeleteNode(node_info.word, &node_info);
return true;
}

const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
return trie_->Find(begin, end);
}
Expand Down
4 changes: 4 additions & 0 deletions deps/cppjieba/Jieba.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ class Jieba {
return dict_trie_.InsertUserWord(word,freq, tag);
}

bool DeleteUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
return dict_trie_.DeleteUserWord(word, tag);
}

bool Find(const string& word)
{
return dict_trie_.Find(word);
Expand Down
28 changes: 27 additions & 1 deletion deps/cppjieba/Trie.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,33 @@ class Trie {
assert(ptNode != NULL);
ptNode->ptValue = ptValue;
}

void DeleteNode(const Unicode& key, const DictUnit* ptValue) {
if (key.begin() == key.end()) {
return;
}
//定义一个NextMap迭代器
TrieNode::NextMap::const_iterator kmIter;
//定义一个指向root的TrieNode指针
TrieNode *ptNode = root_;
for (Unicode::const_iterator citer = key.begin(); citer != key.end(); ++citer) {
//链表不存在元素
if (NULL == ptNode->next) {
return;
}
kmIter = ptNode->next->find(*citer);
//如果map中不存在,跳出循环
if (ptNode->next->end() == kmIter) {
break;
}
//从unordered_map中擦除该项
ptNode->next->erase(*citer);
//删除该node
ptNode = kmIter->second;
delete ptNode;
break;
}
return;
}
private:
void CreateTrie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers) {
if (valuePointers.empty() || keys.empty()) {
Expand Down
4 changes: 4 additions & 0 deletions jieba.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ void AddWord(Jieba x, const char* word) {
((cppjieba::Jieba*)x)->InsertUserWord(word);
}

void RemoveWord(Jieba x, const char* word) {
((cppjieba::Jieba*)x)->DeleteUserWord(word);
}

Word* Tokenize(Jieba x, const char* sentence, TokenizeMode mode, int is_hmm_used) {
std::vector<cppjieba::Word> words;
switch (mode) {
Expand Down
6 changes: 6 additions & 0 deletions jieba.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ func (x *Jieba) AddWord(s string) {
C.AddWord(x.jieba, cstr)
}

func (x *Jieba) RemoveWord(s string) {
cstr := C.CString(s)
defer C.free(unsafe.Pointer(cstr))
C.RemoveWord(x.jieba, cstr)
}

func (x *Jieba) Tokenize(s string, mode TokenizeMode, hmm bool) []Word {
c_int_hmm := 0
if hmm {
Expand Down
1 change: 1 addition & 0 deletions jieba.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ char** CutAll(Jieba handle, const char* sentence);
char** CutForSearch(Jieba handle, const char* sentence, int is_hmm_used);
char** Tag(Jieba handle, const char* sentence);
void AddWord(Jieba handle, const char* word);
void RemoveWord(Jieba handle, const char* word);

Word* Tokenize(Jieba x, const char* sentence, TokenizeMode mode, int is_hmm_used);

Expand Down

0 comments on commit bf251c0

Please sign in to comment.