Skip to content

Commit

Permalink
Merge pull request sea-team#11 from newpanjing/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
newpanjing authored May 16, 2022
2 parents 20c1f66 + 3ecb303 commit f50a0ae
Show file tree
Hide file tree
Showing 49 changed files with 3,299 additions and 979 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ gofound
/go.sum
/.idea/
/*/*.bin
/dist/
/cache
/tests/index
/tests/index
/data
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@

## 技术栈

+ 平衡二叉查找树
+ 二分法查找
+ 快速排序法
+ 倒排索引
Expand Down
25 changes: 25 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#监听地址
addr: 0.0.0.0:5678

#数据目录
data: ./data
#词典目录
dictionary: ./data/dictionary.txt

#是否启用admin
enableAdmin: true

# 最大线程数
gomaxprocs: 4

# admin 用户名和密码
auth: admin:123456

# 接口是否开启压缩
enableGzip: true

# 数据库关闭超时时间
timeout: 600

# 分片数量
shard: 10
29 changes: 0 additions & 29 deletions config.yml

This file was deleted.

83 changes: 83 additions & 0 deletions core/initialize.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package core

import (
"context"
"fmt"
"gofound/global"
"gofound/searcher"
"gofound/searcher/words"
"gofound/web/controller"
"gofound/web/router"
"log"
"net/http"
"os"
"os/signal"
"syscall"
"time"
)

func NewContainer(tokenizer *words.Tokenizer) *searcher.Container {
container := &searcher.Container{
Dir: global.CONFIG.Data,
Debug: global.CONFIG.Debug,
Tokenizer: tokenizer,
Shard: global.CONFIG.Shard,
Timeout: global.CONFIG.Timeout,
}
go container.Init()

return container
}

func NewTokenizer(dictionaryPath string) *words.Tokenizer {
return words.NewTokenizer(dictionaryPath)
}

// Initialize 初始化
func Initialize() {

global.CONFIG = Parser()

defer func() {

if r := recover(); r != nil {
fmt.Printf("panic: %s\n", r)
}
}()

//初始化分词器
tokenizer := NewTokenizer(global.CONFIG.Dictionary)
global.Container = NewContainer(tokenizer)

// 初始化业务逻辑
controller.NewServices()

// 注册路由
r := router.SetupRouter()
// 启动服务
srv := &http.Server{
Addr: global.CONFIG.Addr,
Handler: r,
}
go func() {
// 开启一个goroutine启动服务
if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
log.Println("listen:", err)
}
}()

// 优雅关机
quit := make(chan os.Signal, 1)
signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
<-quit
log.Println("Shutdown Server ...")

ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()

if err := srv.Shutdown(ctx); err != nil {
log.Println("Server Shutdown:", err)
}

log.Println("Server exiting")
}
66 changes: 66 additions & 0 deletions core/parser.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package core

import (
"flag"
"fmt"
"gofound/global"
"gopkg.in/yaml.v2"
"io/ioutil"
"os"
"runtime"
)

// Parser 解析器
func Parser() *global.Config {

var addr = flag.String("addr", "0.0.0.0:5678", "设置监听地址和端口")
//兼容windows
dir := fmt.Sprintf(".%sdata", string(os.PathSeparator))

var dataDir = flag.String("data", dir, "设置数据存储目录")

var debug = flag.Bool("debug", true, "设置是否开启调试模式")

var dictionaryPath = flag.String("dictionary", "./data/dictionary.txt", "设置词典路径")

var enableAdmin = flag.Bool("enableAdmin", true, "设置是否开启后台管理")

var gomaxprocs = flag.Int("gomaxprocs", runtime.NumCPU()*2, "设置GOMAXPROCS")

var auth = flag.String("auth", "", "开启认证,例如: admin:123456")

var enableGzip = flag.Bool("enableGzip", true, "是否开启gzip压缩")
var timeout = flag.Int64("timeout", 10*60, "数据库超时关闭时间(秒)")

var configPath = flag.String("config", "", "配置文件路径,配置此项其他参数忽略")

flag.Parse()

config := &global.Config{}

if *configPath != "" {
//解析配置文件
file, err := ioutil.ReadFile(*configPath)
if err != nil {
panic(err)
}
err = yaml.Unmarshal(file, config)
if err != nil {
panic(err)
}
return config
}
config = &global.Config{
Addr: *addr,
Data: *dataDir,
Debug: *debug,
Dictionary: *dictionaryPath,
EnableAdmin: *enableAdmin,
Gomaxprocs: *gomaxprocs,
Auth: *auth,
EnableGzip: *enableGzip,
Timeout: *timeout,
}

return config
}
33 changes: 23 additions & 10 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,26 @@
./gofound -h

-addr string
设置监听地址和端口 (default "127.0.0.1:5678")
设置监听地址和端口 (default "0.0.0.0:5678")
-auth string
开启认证,例如: admin:123456
开启认证,例如: admin:123456
-config string
配置文件路径,配置此项其他参数忽略
-data string
设置数据存储目录 (default "./data")
设置数据存储目录 (default "./data")
-debug
设置是否开启调试模式 (default true)
设置是否开启调试模式 (default true)
-dictionary string
设置词典路径 (default "./data/dictionary.txt")
设置词典路径 (default "./data/dictionary.txt")
-enableAdmin
设置是否开启后台管理 (default true)
设置是否开启后台管理 (default true)
-enableGzip
是否开启gzip压缩 (default true)
是否开启gzip压缩 (default true)
-gomaxprocs int
设置GOMAXPROCS (default 20)
-shard int
文件分片数量 (default 10)
设置GOMAXPROCS (default 20)
-timeout int
数据库超时关闭时间(秒) (default 600)


```

Expand Down Expand Up @@ -111,3 +114,13 @@
```shell
./gofound --shard=10
```

### timeout

单位为秒。默认为600秒。

数据库超时关闭时间,如果设置为-1,表示永不关闭,适合频繁查询的。如果时间过久会造成内存占用过多

```shell
./gofound --timeout=600
```
61 changes: 1 addition & 60 deletions docs/storage.md
Original file line number Diff line number Diff line change
@@ -1,65 +1,6 @@
# 持久化

+ 关键词索引

关键词是存在内存中的二叉查找树,每个关键词都有一个唯一的索引,这个索引可以通过 `key` 来获取。

`gofound`启动了一个协程,每隔10s检测一次数据是否有变动,有变动的情况就存入磁盘中。

这里我们没有使用`leveldb`,是因为`leveldb`是一个key-value的数据库,而我们的数据只有key没有value,用`leveldb`存储势必会造成存储空间的浪费。
而且频繁取和存,会造成较高的时延和IO。

存储格式:

格式较为简单,由于二叉查找树使用的`uint32`类型,一个key占用4个字节,所以在存储的时候是直接以二进制的方式写入到缓存,最后在压缩进行存储。
[点击查看源码](../searcher/dump/dump.go)

```go
package dump

import (
"gofound/searcher/utils"
"gofound/tree"
)

func Serialize(node *tree.Node) []uint32 {

d := make([]uint32, 0)
if node == nil {
return d
}
d = append(d, node.Key.(uint32))

left := node.Children[0]
right := node.Children[1]

d = append(d, Serialize(right)...)
d = append(d, Serialize(left)...)

return d

}

func Write(node *tree.Node, filename string) {
data := Serialize(node)
utils.Write(&data, filename)
}

func Read(filename string) *tree.Tree {

data := make([]uint32, 0)
utils.Read(&data, filename)

tree := &tree.Tree{Comparator: utils.Uint32Comparator}
//遍历重新组装成内存树
for _, id := range data {
tree.Insert(id)
}
return tree
}


```
持久化采用golang版本的leveldb

+ 关键词与ID映射

Expand Down
15 changes: 15 additions & 0 deletions global/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package global

// Config 服务器设置
type Config struct {
Addr string `yaml:"addr"` // 监听地址
Data string `json:"data"` // 数据目录
Debug bool `yaml:"debug"` // 调试模式
Dictionary string `json:"dictionary"` // 字典路径
EnableAdmin bool `yaml:"enableAdmin"` //启用admin
Gomaxprocs int `json:"gomaxprocs"` //GOMAXPROCS
Shard int `yaml:"shard"` //分片数
Auth string `json:"auth"` //认证
EnableGzip bool `yaml:"enableGzip"` //是否开启gzip压缩
Timeout int64 `json:"timeout"` //超时时间
}
10 changes: 10 additions & 0 deletions global/global.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package global

import (
"gofound/searcher"
)

var (
CONFIG *Config // 服务器设置
Container *searcher.Container
)
Loading

0 comments on commit f50a0ae

Please sign in to comment.