Skip to content

Commit

Permalink
加入暗黑模式
Browse files Browse the repository at this point in the history
  • Loading branch information
newpanjing committed May 12, 2022
1 parent e0d1f76 commit d8c9949
Show file tree
Hide file tree
Showing 15 changed files with 428 additions and 245 deletions.
6 changes: 2 additions & 4 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,8 @@ func initContainer(args Args, tokenizer *words.Tokenizer) *searcher.Container {
Tokenizer: tokenizer,
Shard: args.Shard,
}
err := container.Init()
if err != nil {
panic(err)
}
go container.Init()

return container
}

Expand Down
46 changes: 26 additions & 20 deletions searcher/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ func (e *Engine) MultiSearch(request *model.SearchRequest) *model.SearchResult {

fastSort := &sorts.FastSort{
IsDebug: e.IsDebug,
Order: request.Order,
}

_time := utils.ExecTime(func() {
Expand All @@ -336,6 +337,14 @@ func (e *Engine) MultiSearch(request *model.SearchRequest) *model.SearchResult {
// 处理分页
request = request.GetAndSetDefault()

//计算交集得分和去重
fastSort.Process()

wordMap := make(map[string]bool)
for _, word := range words {
wordMap[word] = true
}

//读取文档
var result = &model.SearchResult{
Total: fastSort.Count(),
Expand All @@ -347,14 +356,6 @@ func (e *Engine) MultiSearch(request *model.SearchRequest) *model.SearchResult {
_time += utils.ExecTime(func() {

pager := new(pagination.Pagination)
var resultItems []model.SliceItem
_tt := utils.ExecTime(func() {
resultItems = fastSort.GetAll(request.Order)
})

if e.IsDebug {
log.Println("处理排序耗时", _tt, "ms")
}

pager.Init(request.Limit, fastSort.Count())
//设置总页数
Expand All @@ -364,15 +365,18 @@ func (e *Engine) MultiSearch(request *model.SearchRequest) *model.SearchResult {
if pager.PageCount != 0 {

start, end := pager.GetPage(request.Page)
items := resultItems[start:end]
count := len(items)

var resultItems = make([]model.SliceItem, 0)
fastSort.GetAll(&resultItems, start, end)

count := len(resultItems)

result.Documents = make([]model.ResponseDoc, count)
//只读取前面100个
wg := new(sync.WaitGroup)
wg.Add(count)
for index, item := range items {

go e.getDocument(item, &result.Documents[index], request, &words, wg)
for index, item := range resultItems {
go e.getDocument(item, &result.Documents[index], request, &wordMap, wg)
}
wg.Wait()
}
Expand All @@ -385,7 +389,7 @@ func (e *Engine) MultiSearch(request *model.SearchRequest) *model.SearchResult {
return result
}

func (e *Engine) getDocument(item model.SliceItem, doc *model.ResponseDoc, request *model.SearchRequest, words *[]string, wg *sync.WaitGroup) {
func (e *Engine) getDocument(item model.SliceItem, doc *model.ResponseDoc, request *model.SearchRequest, wordMap *map[string]bool, wg *sync.WaitGroup) {
buf := e.GetDocById(item.Id)
defer wg.Done()
doc.Score = item.Score
Expand All @@ -395,15 +399,18 @@ func (e *Engine) getDocument(item model.SliceItem, doc *model.ResponseDoc, reque
storageDoc := new(model.StorageIndexDoc)
utils.Decoder(buf, &storageDoc)
doc.Document = storageDoc.Document
doc.Keys = storageDoc.Keys
text := storageDoc.Text
//处理关键词高亮
highlight := request.Highlight
if highlight != nil {
//全部小写
text = strings.ToLower(text)
//还可以优化,只替换击中的词
for _, word := range *words {
text = strings.ReplaceAll(text, word, fmt.Sprintf("%s%s%s", highlight.PreTag, word, highlight.PostTag))
for _, key := range storageDoc.Keys {
if ok := (*wordMap)[key]; ok {
text = strings.ReplaceAll(text, key, fmt.Sprintf("%s%s%s", highlight.PreTag, key, highlight.PostTag))
}
}
}
doc.Text = text
Expand All @@ -426,9 +433,7 @@ func (e *Engine) processKeySearch(word string, fastSort *sorts.FastSort, wg *syn
ids := make([]uint32, 0)
//解码
utils.Decoder(buf, &ids)
//ids越多,说明这个词频越高,这个词越重要
frequency := (len(ids) % base) + 1
fastSort.Add(ids, frequency)
fastSort.Add(&ids)
}

}
Expand Down Expand Up @@ -523,10 +528,11 @@ func (e *Engine) Drop() error {
return err
}
for _, d := range dir {
err := os.RemoveAll(path.Join([]string{e.IndexPath, d.Name()}...))
err := os.RemoveAll(path.Join([]string{d.Name()}...))
if err != nil {
return err
}
os.Remove(e.IndexPath)
}

//清空内存
Expand Down
3 changes: 2 additions & 1 deletion searcher/model/doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ type StorageIndexDoc struct {

type ResponseDoc struct {
IndexDoc
Score int `json:"score,omitempty"` //得分
Score int `json:"score,omitempty"` //得分
Keys []string `json:"keys,omitempty"`
}

type RemoveIndexModel struct {
Expand Down
92 changes: 64 additions & 28 deletions searcher/sorts/fast.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package sorts
import (
"gofound/searcher/model"
"sort"
"strings"
"sync"
)

Expand All @@ -22,13 +23,13 @@ func (x ScoreSlice) Swap(i, j int) {
x[i], x[j] = x[j], x[i]
}

type SortSlice []model.SliceItem
type SortSlice []uint32

func (x SortSlice) Len() int {
return len(x)
}
func (x SortSlice) Less(i, j int) bool {
return x[i].Id < x[j].Id
return x[i] < x[j]
}
func (x SortSlice) Swap(i, j int) {
x[i], x[j] = x[j], x[i]
Expand All @@ -48,58 +49,93 @@ type FastSort struct {

data []model.SliceItem

count int //总数
}

func (f *FastSort) Add(ids []uint32, frequency int) {
f.Lock()
defer f.Unlock()
temps []uint32

for _, id := range ids {
count int //总数

found, index := find(f.data, id)
if found {
f.data[index].Score += 1
} else {
Order string //排序方式
}

f.data = append(f.data, model.SliceItem{
Id: id,
Score: 1,
})
}
}
f.count = len(f.data)
func (f *FastSort) Add(ids *[]uint32) {
//f.Lock()
//defer f.Unlock()

//for _, id := range *ids {
//
// found, index := f.find(&id)
// if found {
// f.data[index].Score += 1
// } else {
//
// f.data = append(f.data, model.SliceItem{
// Id: id,
// Score: 1,
// })
// f.Sort()
// }
//}
//f.count = len(f.data)
f.temps = append(f.temps, *ids...)
}

// 二分法查找
func find(data []model.SliceItem, target uint32) (bool, int) {
func (f *FastSort) find(target *uint32) (bool, int) {

low := 0
high := len(data) - 1
high := f.count - 1
for low <= high {
mid := (low + high) / 2
if data[mid].Id == target {
if f.data[mid].Id == *target {
return true, mid
} else if data[mid].Id < target {
} else if f.data[mid].Id < *target {
high = mid - 1
} else {
low = mid + 1
}
}
return false, -1
//for index, item := range f.data {
// if item.Id == *target {
// return true, index
// }
//}
//return false, -1
}

// Count 获取数量
func (f *FastSort) Count() int {
return f.count
}

func (f *FastSort) GetAll(order string) []model.SliceItem {
// Sort 排序
func (f *FastSort) Sort() {
if strings.ToLower(f.Order) == DESC {
sort.Sort(sort.Reverse(SortSlice(f.temps)))
} else {
sort.Sort(SortSlice(f.temps))
}
}

// Process 处理数据
func (f *FastSort) Process() {
//计算重复
f.Sort()

if order == DESC {
sort.Sort(sort.Reverse(SortSlice(f.data)))
for _, temp := range f.temps {
if found, index := f.find(&temp); found {
f.data[index].Score += 1
} else {
f.data = append(f.data, model.SliceItem{
Id: temp,
Score: 1,
})
f.count++
}
}
//对分数进行排序
sort.Sort(sort.Reverse(ScoreSlice(f.data)))
}
func (f *FastSort) GetAll(result *[]model.SliceItem, start int, end int) {

return f.data
*result = f.data[start:end]
}
37 changes: 36 additions & 1 deletion searcher/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import (
"log"
"os"
"path/filepath"
"regexp"
"strings"
"time"
)

Expand Down Expand Up @@ -277,7 +279,7 @@ func ReleaseAssets(file fs.File, out string) {

}

// DirSizeMB getFileSize get file size by path(B)
// DirSizeB DirSizeMB getFileSize get file size by path(B)
func DirSizeB(path string) int64 {
var size int64
filepath.Walk(path, func(_ string, info os.FileInfo, err error) error {
Expand Down Expand Up @@ -307,3 +309,36 @@ func exists(path string) bool {
_, err := os.Stat(path)
return err == nil || os.IsExist(err)
}

// RemovePunctuation 移除所有的标点符号
func RemovePunctuation(str string) string {
reg := regexp.MustCompile(`\p{P}+`)
return reg.ReplaceAllString(str, "")
}

// RemoveSpace 移除所有的空格
func RemoveSpace(str string) string {
reg := regexp.MustCompile(`\s+`)
return reg.ReplaceAllString(str, "")
}

func contains(s *[]string, e string, skipIndex int) bool {
for index, a := range *s {
if index != skipIndex && strings.Contains(a, e) {
return true
}
}
return false
}

// GetLongWords 获取长词
func GetLongWords(words *[]string) []string {

var newWords = make([]string, 0)
for index, w := range *words {
if !contains(words, w, index) {
newWords = append(newWords, w)
}
}
return newWords
}
4 changes: 4 additions & 0 deletions searcher/words/tokenizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ func NewTokenizer(dictionaryPath string) *Tokenizer {
func (t *Tokenizer) Cut(text string) []string {
//不区分大小写
text = strings.ToLower(text)
//移除所有的标点符号
text = utils.RemovePunctuation(text)
//移除所有的空格
text = utils.RemoveSpace(text)

var wordMap = make(map[string]int)

Expand Down
6 changes: 0 additions & 6 deletions tests/http/index.http
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,3 @@ Content-Type: application/json
"number": 223
}
}

###
GET localhost:5678/api/dump
Accept: application/json

###
Loading

0 comments on commit d8c9949

Please sign in to comment.