forked from dgraph-io/badger
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwatermark.go
255 lines (225 loc) · 7.55 KB
/
watermark.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
/*
* Copyright 2016-2018 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package y
import (
"container/heap"
"context"
"sync/atomic"
"golang.org/x/net/trace"
)
type uint64Heap []uint64
func (u uint64Heap) Len() int { return len(u) }
func (u uint64Heap) Less(i, j int) bool { return u[i] < u[j] }
func (u uint64Heap) Swap(i, j int) { u[i], u[j] = u[j], u[i] }
func (u *uint64Heap) Push(x interface{}) { *u = append(*u, x.(uint64)) }
func (u *uint64Heap) Pop() interface{} {
old := *u
n := len(old)
x := old[n-1]
*u = old[0 : n-1]
return x
}
// mark contains one of more indices, along with a done boolean to indicate the
// status of the index: begin or done. It also contains waiters, who could be
// waiting for the watermark to reach >= a certain index.
type mark struct {
// Either this is an (index, waiter) pair or (index, done) or (indices, done).
index uint64
waiter chan struct{}
indices []uint64
done bool // Set to true if the index is done.
}
// WaterMark is used to keep track of the minimum un-finished index. Typically, an index k becomes
// finished or "done" according to a WaterMark once Done(k) has been called
// 1. as many times as Begin(k) has, AND
// 2. a positive number of times.
//
// An index may also become "done" by calling SetDoneUntil at a time such that it is not
// inter-mingled with Begin/Done calls.
//
// Since doneUntil and lastIndex addresses are passed to sync/atomic packages, we ensure that they
// are 64-bit aligned by putting them at the beginning of the structure.
type WaterMark struct {
doneUntil uint64
lastIndex uint64
Name string
markCh chan mark
elog trace.EventLog
}
// Init initializes a WaterMark struct. MUST be called before using it.
func (w *WaterMark) Init(closer *Closer, eventLogging bool) {
w.markCh = make(chan mark, 100)
if eventLogging {
w.elog = trace.NewEventLog("Watermark", w.Name)
} else {
w.elog = NoEventLog
}
go w.process(closer)
}
// Begin sets the last index to the given value.
func (w *WaterMark) Begin(index uint64) {
atomic.StoreUint64(&w.lastIndex, index)
w.markCh <- mark{index: index, done: false}
}
// BeginMany works like Begin but accepts multiple indices.
func (w *WaterMark) BeginMany(indices []uint64) {
atomic.StoreUint64(&w.lastIndex, indices[len(indices)-1])
w.markCh <- mark{index: 0, indices: indices, done: false}
}
// Done sets a single index as done.
func (w *WaterMark) Done(index uint64) {
w.markCh <- mark{index: index, done: true}
}
// DoneMany works like Done but accepts multiple indices.
func (w *WaterMark) DoneMany(indices []uint64) {
w.markCh <- mark{index: 0, indices: indices, done: true}
}
// DoneUntil returns the maximum index that has the property that all indices
// less than or equal to it are done.
func (w *WaterMark) DoneUntil() uint64 {
return atomic.LoadUint64(&w.doneUntil)
}
// SetDoneUntil sets the maximum index that has the property that all indices
// less than or equal to it are done.
func (w *WaterMark) SetDoneUntil(val uint64) {
atomic.StoreUint64(&w.doneUntil, val)
}
// LastIndex returns the last index for which Begin has been called.
func (w *WaterMark) LastIndex() uint64 {
return atomic.LoadUint64(&w.lastIndex)
}
// WaitForMark waits until the given index is marked as done.
func (w *WaterMark) WaitForMark(ctx context.Context, index uint64) error {
if w.DoneUntil() >= index {
return nil
}
waitCh := make(chan struct{})
w.markCh <- mark{index: index, waiter: waitCh}
select {
case <-ctx.Done():
return ctx.Err()
case <-waitCh:
return nil
}
}
// process is used to process the Mark channel. This is not thread-safe,
// so only run one goroutine for process. One is sufficient, because
// all goroutine ops use purely memory and cpu.
// Each index has to emit atleast one begin watermark in serial order otherwise waiters
// can get blocked idefinitely. Example: We had an watermark at 100 and a waiter at 101,
// if no watermark is emitted at index 101 then waiter would get stuck indefinitely as it
// can't decide whether the task at 101 has decided not to emit watermark or it didn't get
// scheduled yet.
func (w *WaterMark) process(closer *Closer) {
defer closer.Done()
var indices uint64Heap
// pending maps raft proposal index to the number of pending mutations for this proposal.
pending := make(map[uint64]int)
waiters := make(map[uint64][]chan struct{})
heap.Init(&indices)
var loop uint64
processOne := func(index uint64, done bool) {
// If not already done, then set. Otherwise, don't undo a done entry.
prev, present := pending[index]
if !present {
heap.Push(&indices, index)
}
delta := 1
if done {
delta = -1
}
pending[index] = prev + delta
loop++
if len(indices) > 0 && loop%10000 == 0 {
min := indices[0]
w.elog.Printf("WaterMark %s: Done entry %4d. Size: %4d Watermark: %-4d Looking for: "+
"%-4d. Value: %d\n", w.Name, index, len(indices), w.DoneUntil(), min, pending[min])
}
// Update mark by going through all indices in order; and checking if they have
// been done. Stop at the first index, which isn't done.
doneUntil := w.DoneUntil()
if doneUntil > index {
AssertTruef(false, "Name: %s doneUntil: %d. Index: %d", w.Name, doneUntil, index)
}
until := doneUntil
loops := 0
for len(indices) > 0 {
min := indices[0]
if done := pending[min]; done > 0 {
break // len(indices) will be > 0.
}
// Even if done is called multiple times causing it to become
// negative, we should still pop the index.
heap.Pop(&indices)
delete(pending, min)
until = min
loops++
}
if until != doneUntil {
AssertTrue(atomic.CompareAndSwapUint64(&w.doneUntil, doneUntil, until))
w.elog.Printf("%s: Done until %d. Loops: %d\n", w.Name, until, loops)
}
notifyAndRemove := func(idx uint64, toNotify []chan struct{}) {
for _, ch := range toNotify {
close(ch)
}
delete(waiters, idx) // Release the memory back.
}
if until-doneUntil <= uint64(len(waiters)) {
// Issue #908 showed that if doneUntil is close to 2^60, while until is zero, this loop
// can hog up CPU just iterating over integers creating a busy-wait loop. So, only do
// this path if until - doneUntil is less than the number of waiters.
for idx := doneUntil + 1; idx <= until; idx++ {
if toNotify, ok := waiters[idx]; ok {
notifyAndRemove(idx, toNotify)
}
}
} else {
for idx, toNotify := range waiters {
if idx <= until {
notifyAndRemove(idx, toNotify)
}
}
} // end of notifying waiters.
}
for {
select {
case <-closer.HasBeenClosed():
return
case mark := <-w.markCh:
if mark.waiter != nil {
doneUntil := atomic.LoadUint64(&w.doneUntil)
if doneUntil >= mark.index {
close(mark.waiter)
} else {
ws, ok := waiters[mark.index]
if !ok {
waiters[mark.index] = []chan struct{}{mark.waiter}
} else {
waiters[mark.index] = append(ws, mark.waiter)
}
}
} else {
if mark.index > 0 {
processOne(mark.index, mark.done)
}
for _, index := range mark.indices {
processOne(index, mark.done)
}
}
}
}
}