-
Notifications
You must be signed in to change notification settings - Fork 34
/
Copy pathanonymize.go
77 lines (70 loc) · 1.97 KB
/
anonymize.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
package anonymize
import (
"fmt"
"regexp"
"sort"
"strings"
)
const anonym = "xxx"
var tokenSep = regexp.MustCompile(`\s|[,;]`)
var userSep = regexp.MustCompile("[._-]")
var adjacentSecrets = regexp.MustCompile(fmt.Sprintf(`%s(\s%s)+`, anonym, anonym))
func replaceNames(src string, names []string) string {
words := strings.Split(src, " ")
for i, word := range words {
for _, name := range names {
lowerCasedWord := strings.ToLower(word)
lowerCasedTrimmedWord := strings.Trim(lowerCasedWord, ":,!?.;")
lowerCasedName := strings.ToLower(name)
if lowerCasedTrimmedWord == lowerCasedName {
words[i] = strings.ReplaceAll(lowerCasedWord, lowerCasedName, anonym)
break
}
}
}
return strings.Join(words, " ")
}
// Anonymize replace secret information with xxx.
func Anonymize(src string, names []string, secrets ...string) (string, error) {
src = replaceNames(src, names)
tokens := tokenize(secrets...)
if len(tokens) == 0 {
return src, nil
}
secret, err := or(tokens)
if err != nil {
return src, err
}
src = secret.ReplaceAllString(src, anonym)
src = adjacentSecrets.ReplaceAllString(src, anonym)
return src, nil
}
func tokenize(text ...string) (tokens []string) {
tokenSet := map[string]interface{}{}
for _, s := range text {
for _, token := range tokenSep.Split(strings.ToLower(s), -1) {
token = strings.Trim(token, "<>\" \n\t'")
if strings.Contains(token, "@") {
parts := strings.SplitN(token, "@", 2)
tokenSet[parts[1]] = true
for _, userPart := range userSep.Split(parts[0], 5) {
if len(userPart) > 2 {
tokenSet[userPart] = true
}
}
} else if len(token) > 1 {
tokenSet[token] = true
}
}
}
for token := range tokenSet {
tokens = append(tokens, regexp.QuoteMeta(token))
}
sort.SliceStable(tokens, func(i, j int) bool {
return len(tokens[i]) > len(tokens[j])
})
return tokens
}
func or(tokens []string) (*regexp.Regexp, error) {
return regexp.Compile(fmt.Sprintf("(?i)%s", strings.Join(tokens, "|")))
}