forked from cyfdecyf/cow
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdomainset.go
326 lines (285 loc) · 6.69 KB
/
domainset.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
package main
import (
"bufio"
"io"
"os"
"path"
"sort"
"strings"
"sync"
)
type domainSet map[string]bool
// Basically a concurrent map. I don't want to use channels to implement
// concurrent access to this as I'm comfortable to use locks for simple tasks
// like this
type paraDomainSet struct {
sync.RWMutex
domainSet
}
func newDomainSet() domainSet {
return make(map[string]bool)
}
func (ds domainSet) loadDomainList(fpath string) (lst []string, err error) {
lst, err = loadDomainList(fpath)
if err != nil {
return
}
// This executes in single goroutine, so no need to use lock
for _, v := range lst {
// debug.Println("loaded domain:", v)
ds[v] = true
}
return
}
func (ds domainSet) toArray() []string {
l := len(ds)
lst := make([]string, l, l)
i := 0
for k, _ := range ds {
lst[i] = k
i++
}
return lst
}
func newParaDomainSet() *paraDomainSet {
return ¶DomainSet{domainSet: newDomainSet()}
}
func (ds *paraDomainSet) add(dm string) {
ds.Lock()
ds.domainSet[dm] = true
ds.Unlock()
}
func (ds *paraDomainSet) has(dm string) bool {
ds.RLock()
_, ok := ds.domainSet[dm]
ds.RUnlock()
return ok
}
func (ds *paraDomainSet) del(dm string) {
ds.Lock()
delete(ds.domainSet, dm)
ds.Unlock()
}
var blockedDs = newParaDomainSet()
var directDs = newParaDomainSet()
var blockedDomainChanged = false
var directDomainChanged = false
var alwaysBlockedDs = newDomainSet()
var alwaysDirectDs = newDomainSet()
var chouDs = newDomainSet()
func requestDomain(r *Request) string {
h, _ := splitHostPort(r.URL.Host)
return host2Domain(h)
}
func inAlwaysDs(dm string) bool {
return alwaysBlockedDs[dm] || alwaysDirectDs[dm]
}
func hostInAlwaysDirectDs(host string) bool {
h, _ := splitHostPort(host)
return alwaysDirectDs[host2Domain(h)]
}
func hostInAlwaysBlockedDs(host string) bool {
h, _ := splitHostPort(host)
return alwaysBlockedDs[host2Domain(h)]
}
func isRequestBlocked(r *Request) bool {
dm := requestDomain(r)
if alwaysDirectDs[dm] {
return false
}
if alwaysBlockedDs[dm] {
return true
}
return blockedDs.has(dm)
}
func isRequestInChouDs(r *Request) bool {
dm := requestDomain(r)
return chouDs[dm]
}
func addBlockedRequest(r *Request) bool {
host, _ := splitHostPort(r.URL.Host)
if hostIsIP(host) {
return false
}
dm := host2Domain(host)
// For chou domain, we should add it to the blocked list in order to use
// parent proxy, but don't write it back to auto-block file.
if inAlwaysDs(dm) {
return false
}
if !blockedDs.has(dm) {
blockedDs.add(dm)
blockedDomainChanged = true
debug.Printf("%v added to blocked list\n", dm)
return true
}
// Delete this request from direct domain set
delDirectRequest(r)
return false
}
func delBlockedRequest(r *Request) {
dm := requestDomain(r)
if blockedDs.has(dm) {
blockedDs.del(dm)
blockedDomainChanged = true
debug.Printf("%v deleted from blocked list\n", dm)
}
}
func addDirectRequest(r *Request) {
host, _ := splitHostPort(r.URL.Host)
if hostIsIP(host) {
return
}
dm := host2Domain(host)
if inAlwaysDs(dm) || chouDs[dm] {
return
}
if !directDs.has(dm) {
directDs.add(dm)
directDomainChanged = true
}
// Delete this request from blocked domain set
delBlockedRequest(r)
}
func delDirectRequest(r *Request) {
dm := requestDomain(r)
if directDs.has(dm) {
directDs.del(dm)
directDomainChanged = true
}
}
func writeBlockedDs() {
if !config.updateBlocked {
return
}
if !blockedDomainChanged {
return
}
writeDomainList(config.blockedFile, blockedDs.toArray())
}
func writeDirectDs() {
if !config.updateDirect {
return
}
if !directDomainChanged {
return
}
writeDomainList(config.directFile, directDs.toArray())
}
// filter out domain in blocked and direct domain set.
func filterOutDs(ds domainSet) {
for k, _ := range ds {
if blockedDs.domainSet[k] {
delete(blockedDs.domainSet, k)
blockedDomainChanged = true
}
if directDs.domainSet[k] {
delete(directDs.domainSet, k)
directDomainChanged = true
}
}
}
// If a domain name appears in both blocked and direct domain set, only keep
// it in the blocked set.
func filterOutBlockedDsInDirectDs() {
for k, _ := range blockedDs.domainSet {
if directDs.domainSet[k] {
delete(directDs.domainSet, k)
directDomainChanged = true
}
}
for k, _ := range alwaysBlockedDs {
if alwaysDirectDs[k] {
errl.Printf("%s in both always blocked and direct domain lists, taken as blocked.\n", k)
delete(alwaysDirectDs, k)
}
}
}
func writeDomainSet() {
// chou domain set maybe added to blocked site during execution,
// filter them out before writing back to disk.
filterOutDs(chouDs)
writeBlockedDs()
writeDirectDs()
}
func loadDomainList(fpath string) (lst []string, err error) {
f, err := openFile(fpath)
if f == nil || err != nil {
return
}
defer f.Close()
fr := bufio.NewReader(f)
lst = make([]string, 0)
var domain string
for {
domain, err = ReadLine(fr)
if err == io.EOF {
return lst, nil
} else if err != nil {
errl.Println("Error reading domain list from:", fpath, err)
return
}
if domain == "" {
continue
}
lst = append(lst, strings.TrimSpace(domain))
}
return
}
func writeDomainList(fpath string, lst []string) (err error) {
tmpPath := path.Join(config.dir, "tmp-domain")
f, err := os.Create(tmpPath)
if err != nil {
errl.Println("Error creating tmp domain list file:", err)
return
}
sort.Sort(sort.StringSlice(lst))
all := strings.Join(lst, "\n")
f.WriteString(all)
f.Close()
if err = os.Rename(tmpPath, fpath); err != nil {
errl.Printf("Error moving tmp domain list file to %s: %v\n", fpath, err)
}
return
}
var topLevelDomain = map[string]bool{
"co": true,
"org": true,
"com": true,
"net": true,
"edu": true,
}
func host2Domain(host string) (domain string) {
lastDot := strings.LastIndex(host, ".")
if lastDot == -1 {
return host // simple host name, we should not hanlde this
}
// Find the 2nd last dot
dot2ndLast := strings.LastIndex(host[:lastDot], ".")
if dot2ndLast == -1 {
return host
}
part := host[dot2ndLast+1 : lastDot]
// If the 2nd last part of a domain name equals to a top level
// domain, search for the 3rd part in the host name.
// So domains like bbc.co.uk will not be recorded as co.uk
if topLevelDomain[part] {
dot3rdLast := strings.LastIndex(host[:dot2ndLast], ".")
if dot3rdLast == -1 {
return host
}
return host[dot3rdLast+1:]
}
return host[dot2ndLast+1:]
}
func loadDomainSet() {
blockedDs.loadDomainList(config.blockedFile)
directDs.loadDomainList(config.directFile)
alwaysBlockedDs.loadDomainList(config.alwaysBlockedFile)
alwaysDirectDs.loadDomainList(config.alwaysDirectFile)
chouDs.loadDomainList(config.chouFile)
filterOutDs(chouDs)
filterOutDs(alwaysDirectDs)
filterOutDs(alwaysBlockedDs)
filterOutBlockedDsInDirectDs()
}