forked from huichen/sego
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgoroutines.go
82 lines (69 loc) · 1.43 KB
/
goroutines.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
// 测试sego并行分词速度
package main
import (
"bufio"
"fmt"
"github.com/huichen/sego"
"log"
"os"
"runtime"
"time"
)
var (
segmenter = sego.Segmenter{}
numThreads = runtime.NumCPU()
task = make(chan []byte, numThreads*40)
done = make(chan bool, numThreads)
numRuns = 50
)
func worker() {
for line := range task {
segmenter.Segment(line)
}
done <- true
}
func main() {
// 将线程数设置为CPU数
runtime.GOMAXPROCS(numThreads)
// 载入词典
segmenter.LoadDictionary("../data/dictionary.txt")
// 打开将要分词的文件
file, err := os.Open("../testdata/bailuyuan.txt")
if err != nil {
log.Fatal(err)
}
defer file.Close()
// 逐行读入
scanner := bufio.NewScanner(file)
size := 0
lines := [][]byte{}
for scanner.Scan() {
var text string
fmt.Sscanf(scanner.Text(), "%s", &text)
content := []byte(text)
size += len(content)
lines = append(lines, content)
}
// 启动工作线程
for i := 0; i < numThreads; i++ {
go worker()
}
log.Print("开始分词")
// 记录时间
t0 := time.Now()
// 并行分词
for i := 0; i < numRuns; i++ {
for _, l := range lines {
task <- l
}
}
close(task)
// 确保分词完成
for i := 0; i < numThreads; i++ {
<-done
}
// 记录时间并计算分词速度
t1 := time.Now()
log.Printf("分词花费时间 %v", t1.Sub(t0))
log.Printf("分词速度 %f MB/s", float64(size*numRuns)/t1.Sub(t0).Seconds()/(1024*1024))
}