Skip to content

Commit

Permalink
修改分词逻辑,重命名依赖包
Browse files Browse the repository at this point in the history
  • Loading branch information
linzedong committed Dec 25, 2023
1 parent 669549a commit 7a219b2
Show file tree
Hide file tree
Showing 33 changed files with 88 additions and 66 deletions.
10 changes: 5 additions & 5 deletions core/initialize.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ package core
import (
"context"
"fmt"
"github.com/sea-team/gofound/global"
"github.com/sea-team/gofound/searcher"
"github.com/sea-team/gofound/searcher/words"
"github.com/sea-team/gofound/web/controller"
"github.com/sea-team/gofound/web/router"
"github.com/linzedo/gofound/global"
"github.com/linzedo/gofound/searcher"
"github.com/linzedo/gofound/searcher/words"
"github.com/linzedo/gofound/web/controller"
"github.com/linzedo/gofound/web/router"
"log"
"net/http"

Expand Down
2 changes: 1 addition & 1 deletion core/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package core
import (
"flag"
"fmt"
"github.com/sea-team/gofound/global"
"github.com/linzedo/gofound/global"
"os"
"runtime"

Expand Down
2 changes: 1 addition & 1 deletion global/global.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package global

import (
"github.com/sea-team/gofound/searcher"
"github.com/linzedo/gofound/searcher"
)

var (
Expand Down
4 changes: 2 additions & 2 deletions main.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
package main

import (
"github.com/sea-team/gofound/core"
"github.com/linzedo/gofound/core"
)

func main() {
//初始化容器和参数解析
core.Initialize("")
core.Initialize("./config/config.yaml")
}
4 changes: 2 additions & 2 deletions sdk/base.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package gofound

import (
"github.com/sea-team/gofound/searcher/model"
"github.com/sea-team/gofound/searcher/system"
"github.com/linzedo/gofound/searcher/model"
"github.com/linzedo/gofound/searcher/system"
"runtime"
)

Expand Down
6 changes: 3 additions & 3 deletions sdk/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ package gofound

import (
"fmt"
"github.com/sea-team/gofound/core"
"github.com/sea-team/gofound/global"
"github.com/sea-team/gofound/searcher"
"github.com/linzedo/gofound/core"
"github.com/linzedo/gofound/global"
"github.com/linzedo/gofound/searcher"
"os"
"runtime"
"sync"
Expand Down
2 changes: 1 addition & 1 deletion sdk/database.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package gofound

import (
"github.com/sea-team/gofound/searcher"
"github.com/linzedo/gofound/searcher"

"github.com/syndtr/goleveldb/leveldb/errors"
)
Expand Down
2 changes: 1 addition & 1 deletion sdk/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package gofound

import (
"errors"
"github.com/sea-team/gofound/searcher/model"
"github.com/linzedo/gofound/searcher/model"
)

// AddIndex 添加索引
Expand Down
2 changes: 1 addition & 1 deletion searcher/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package searcher
import (
"errors"
"fmt"
"github.com/sea-team/gofound/searcher/words"
"github.com/linzedo/gofound/searcher/words"
"log"
"os"
"runtime"
Expand Down
23 changes: 15 additions & 8 deletions searcher/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ package searcher

import (
"fmt"
"github.com/sea-team/gofound/searcher/arrays"
"github.com/sea-team/gofound/searcher/model"
"github.com/sea-team/gofound/searcher/pagination"
"github.com/sea-team/gofound/searcher/sorts"
"github.com/sea-team/gofound/searcher/storage"
"github.com/sea-team/gofound/searcher/utils"
"github.com/sea-team/gofound/searcher/words"
"github.com/linzedo/gofound/searcher/arrays"
"github.com/linzedo/gofound/searcher/model"
"github.com/linzedo/gofound/searcher/pagination"
"github.com/linzedo/gofound/searcher/sorts"
"github.com/linzedo/gofound/searcher/storage"
"github.com/linzedo/gofound/searcher/utils"
"github.com/linzedo/gofound/searcher/words"
"log"
"os"
"runtime"
Expand Down Expand Up @@ -352,9 +352,16 @@ func (e *Engine) addPositiveIndex(index *model.IndexDoc, keys []string) {
func (e *Engine) MultiSearch(request *model.SearchRequest) (*model.SearchResult, error) {
//等待搜索初始化完成
e.Wait()
var (
words []string
)

//分词搜索
words := e.Tokenizer.Cut(request.Query)
if request.Query == "" {
words = []string{""}
} else {
words = e.Tokenizer.Cut(request.Query)[1:]
}

fastSort := &sorts.FastSort{
IsDebug: e.IsDebug,
Expand Down
2 changes: 1 addition & 1 deletion searcher/sorts/fast.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package sorts

import (
"github.com/sea-team/gofound/searcher/model"
"github.com/linzedo/gofound/searcher/model"
"sort"
"strings"
"sync"
Expand Down
2 changes: 1 addition & 1 deletion searcher/sorts/sort.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package sorts

import (
"github.com/emirpasic/gods/trees/avltree"
"github.com/sea-team/gofound/searcher/utils"
"github.com/linzedo/gofound/searcher/utils"
"log"
"sync"
)
Expand Down
20 changes: 16 additions & 4 deletions searcher/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,15 +223,27 @@ func DirSizeB(path string) int64 {
}

// RemovePunctuation 移除所有的标点符号
var regPunctuation = regexp.MustCompile(`\p{P}+`)

func RemovePunctuation(str string) string {
reg := regexp.MustCompile(`\p{P}+`)
return reg.ReplaceAllString(str, "")
//reg := regexp.MustCompile(`\p{P}+`)
return regPunctuation.ReplaceAllString(str, "")
}

func IsPunctuation(str string) bool {
return regPunctuation.MatchString(str)
}

// RemoveSpace 移除所有的空格
var regSpace = regexp.MustCompile(`\s+`)

func RemoveSpace(str string) string {
reg := regexp.MustCompile(`\s+`)
return reg.ReplaceAllString(str, "")
//reg := regexp.MustCompile(`\s+`)
return regSpace.ReplaceAllString(str, "")
}

func IsSpace(str string) bool {
return regSpace.MatchString(str)
}

// init 注册数据类型
Expand Down
7 changes: 5 additions & 2 deletions searcher/words/tokenizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package words

import (
"embed"
"github.com/sea-team/gofound/searcher/utils"
"github.com/linzedo/gofound/searcher/utils"
"strings"

"github.com/wangbin/jiebago"
Expand Down Expand Up @@ -45,12 +45,15 @@ func (t *Tokenizer) Cut(text string) []string {
var wordMap = make(map[string]struct{})

resultChan := t.seg.CutForSearch(text, true)
var wordsSlice []string
var wordsSlice = []string{""}
for {
w, ok := <-resultChan
if !ok {
break
}
if utils.IsSpace(w) || utils.IsPunctuation(w) {
continue
}
_, found := wordMap[w]
if !found {
//去除重复的词
Expand Down
2 changes: 1 addition & 1 deletion tests/benchmark/array_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package benchmark

import (
"github.com/sea-team/gofound/searcher/arrays"
"github.com/linzedo/gofound/searcher/arrays"
"testing"
)

Expand Down
2 changes: 1 addition & 1 deletion tests/benchmark/skiplist_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package benchmark

import (
"github.com/sea-team/gofound/searcher/arrays"
"github.com/linzedo/gofound/searcher/arrays"
"math/rand"
"testing"
)
Expand Down
8 changes: 4 additions & 4 deletions tests/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ package tests
import (
"bufio"
"fmt"
"github.com/sea-team/gofound/searcher"
"github.com/sea-team/gofound/searcher/model"
"github.com/sea-team/gofound/searcher/utils"
"github.com/sea-team/gofound/searcher/words"
"github.com/linzedo/gofound/searcher"
"github.com/linzedo/gofound/searcher/model"
"github.com/linzedo/gofound/searcher/utils"
"github.com/linzedo/gofound/searcher/words"
"os"
"strings"
"testing"
Expand Down
2 changes: 1 addition & 1 deletion tests/sort_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package tests

import (
"fmt"
"github.com/sea-team/gofound/searcher/utils"
"github.com/linzedo/gofound/searcher/utils"
"math/rand"
"sort"
"testing"
Expand Down
2 changes: 1 addition & 1 deletion web/admin/admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package admin

import (
"github.com/gin-gonic/gin"
"github.com/sea-team/gofound/web/admin/assets"
"github.com/linzedo/gofound/web/admin/assets"
"net/http"
"net/url"
"os"
Expand Down
2 changes: 1 addition & 1 deletion web/controller/base.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package controller

import (
"github.com/sea-team/gofound/searcher/model"
"github.com/linzedo/gofound/searcher/model"

"github.com/gin-gonic/gin"
)
Expand Down
2 changes: 1 addition & 1 deletion web/controller/index.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package controller

import (
"github.com/sea-team/gofound/searcher/model"
"github.com/linzedo/gofound/searcher/model"
"time"

"github.com/gin-gonic/gin"
Expand Down
2 changes: 1 addition & 1 deletion web/controller/services.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package controller

import (
service2 "github.com/sea-team/gofound/web/service"
service2 "github.com/linzedo/gofound/web/service"
)

var srv *Services
Expand Down
2 changes: 1 addition & 1 deletion web/middleware/exception.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package middleware

import (
"github.com/sea-team/gofound/web"
"github.com/linzedo/gofound/web"
"runtime/debug"

"github.com/gin-gonic/gin"
Expand Down
2 changes: 1 addition & 1 deletion web/router/base.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package router

import (
"github.com/sea-team/gofound/web/controller"
"github.com/linzedo/gofound/web/controller"

"github.com/gin-gonic/gin"
)
Expand Down
2 changes: 1 addition & 1 deletion web/router/database.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package router

import (
"github.com/sea-team/gofound/web/controller"
"github.com/linzedo/gofound/web/controller"

"github.com/gin-gonic/gin"
)
Expand Down
2 changes: 1 addition & 1 deletion web/router/index.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package router

import (
"github.com/sea-team/gofound/web/controller"
"github.com/linzedo/gofound/web/controller"

"github.com/gin-gonic/gin"
)
Expand Down
6 changes: 3 additions & 3 deletions web/router/router.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package router

import (
"github.com/sea-team/gofound/global"
"github.com/sea-team/gofound/web/admin"
"github.com/sea-team/gofound/web/middleware"
"github.com/linzedo/gofound/global"
"github.com/linzedo/gofound/web/admin"
"github.com/linzedo/gofound/web/middleware"
"io"
"log"
"mime"
Expand Down
2 changes: 1 addition & 1 deletion web/router/word.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package router

import (
"github.com/sea-team/gofound/web/controller"
"github.com/linzedo/gofound/web/controller"

"github.com/gin-gonic/gin"
)
Expand Down
8 changes: 4 additions & 4 deletions web/service/base.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
package service

import (
"github.com/sea-team/gofound/global"
"github.com/sea-team/gofound/searcher"
"github.com/sea-team/gofound/searcher/model"
"github.com/sea-team/gofound/searcher/system"
"github.com/linzedo/gofound/global"
"github.com/linzedo/gofound/searcher"
"github.com/linzedo/gofound/searcher/model"
"github.com/linzedo/gofound/searcher/system"
"os"
"runtime"
)
Expand Down
4 changes: 2 additions & 2 deletions web/service/database.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package service

import (
"github.com/sea-team/gofound/global"
"github.com/sea-team/gofound/searcher"
"github.com/linzedo/gofound/global"
"github.com/linzedo/gofound/searcher"
)

type Database struct {
Expand Down
6 changes: 3 additions & 3 deletions web/service/index.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package service

import (
"github.com/sea-team/gofound/global"
"github.com/sea-team/gofound/searcher"
"github.com/sea-team/gofound/searcher/model"
"github.com/linzedo/gofound/global"
"github.com/linzedo/gofound/searcher"
"github.com/linzedo/gofound/searcher/model"
)

type Index struct {
Expand Down
Loading

0 comments on commit 7a219b2

Please sign in to comment.