forked from scylladb/seastar
-
Notifications
You must be signed in to change notification settings - Fork 0
/
chunked_fifo.hh
461 lines (434 loc) · 16.6 KB
/
chunked_fifo.hh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
/*
* This file is open source software, licensed to you under the terms
* of the Apache License, Version 2.0 (the "License"). See the NOTICE file
* distributed with this work for additional information regarding copyright
* ownership. You may not use this file except in compliance with the License.
*
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Copyright (C) 2016 ScyllaDB Ltd.
*/
#pragma once
// An unbounded FIFO queue of objects of type T.
//
// It provides operations to push items in one end of the queue, and pop them
// from the other end of the queue - both operations are guaranteed O(1)
// (not just amortized O(1)). The size() operation is also O(1).
// chunked_fifo also guarantees that the largest contiguous memory allocation
// it does is O(1). The total memory used is, of course, O(N).
//
// How does chunked_fifo differ from std::list<>, circular_buffer<> and
// std::deque()?
//
// std::list<> can also make all the above guarantees, but is inefficient -
// both at run speed (every operation requires an allocation), and in memory
// use. Much more efficient than std::list<> is our circular_buffer<>, which
// allocates a contiguous array to hold the items and only reallocates it,
// exponentially, when the queue grows. On one test of several different
// push/pop scenarios, circular_buffer<> was between 5 and 20 times faster
// than std::list, and also used considerably less memory.
// The problem with circular_buffer<> is that gives up on the last guarantee
// we made above: circular_buffer<> allocates all the items in one large
// contiguous allocation - that might not be possible when the memory is
// highly fragmented.
// std::deque<> aims to solve the contiguous allocation problem by allocating
// smaller chunks of the queue, and keeping a list of them in an array. This
// array is necessary to allow for O(1) random access to any element, a
// feature which we do not need; But this array is itself contiguous so
// std::deque<> attempts larger contiguous allocations the larger the queue
// gets: std::deque<>'s contiguous allocation is still O(N) and in fact
// exactly 1/64 of the size of circular_buffer<>'s contiguous allocation.
// So it's an improvement over circular_buffer<>, but not a full solution.
//
// chunked_fifo<> is such a solution: it also allocates the queue in fixed-
// size chunks (just like std::deque) but holds them in a linked list, not
// a contiguous array, so there are no large contiguous allocations.
//
// Unlike std::deque<> or circular_buffer<>, chunked_fifo only provides the
// operations needed by std::queue, i.e.,: empty(), size(), front(), back(),
// push_back() and pop_front(). For simplicity, we do *not* implement other
// possible operations, like inserting or deleting elements from the "wrong"
// side of the queue or from the middle, nor random-access to items in the
// middle of the queue or iteration over the items without popping them.
//
// Another feature of chunked_fifo which std::deque is missing is the ability
// to control the chunk size, as a template parameter. In std::deque the
// chunk size is undocumented and fixed - in gcc, it is always 512 bytes.
// chunked_fifo, on the other hand, makes the chunk size (in number of items
// instead of bytes) a template parameter; In situations where the queue is
// expected to become very long, using a larger chunk size might make sense
// because it will result in fewer allocations.
//
// chunked_fifo uses uninitialized storage for unoccupied elements, and thus
// uses move/copy constructors instead of move/copy assignments, which are
// less efficient.
#include <memory>
#include <algorithm>
template <typename T, size_t items_per_chunk = 128>
class chunked_fifo {
static_assert((items_per_chunk & (items_per_chunk - 1)) == 0,
"chunked_fifo chunk size must be power of two");
// Note: a "chunk" struct should be allocated but not constructed, so that
// the individual items are not constructed until first pushed.
struct chunk {
T items[items_per_chunk];
struct chunk* next;
// begin and end interpreted mod items_per_chunk
unsigned begin;
unsigned end;
};
// We pop from the chunk at _front_chunk. This chunk is then linked to
// the following chunks via the "next" link. _back_chunk points to the
// last chunk in this list, and it is where we push.
chunk* _front_chunk = nullptr; // where we pop
chunk* _back_chunk = nullptr; // where we push
// We want an O(1) size but don't want to maintain a size() counter
// because this will slow down every push and pop operation just for
// the rare size() call. Instead, we just keep a count of chunks (which
// doesn't change on every push or pop), from which we can calculate
// size() when needed, and still be O(1).
// This assumes the invariant that all middle chunks (except the front
// and back) are always full.
size_t _nchunks = 0;
// A list of freed chunks, to support reserve() and to improve
// performance of repeated push and pop, especially on an empty queue.
// It is a performance/memory tradeoff how many freed chunks to keep
// here (see save_free_chunks constant below).
chunk* _free_chunks = nullptr;
size_t _nfree_chunks = 0;
public:
using value_type = T;
using size_type = size_t;
using reference = T&;
using pointer = T*;
using const_reference = const T&;
using const_pointer = const T*;
public:
chunked_fifo() = default;
chunked_fifo(chunked_fifo&& x) noexcept;
chunked_fifo(const chunked_fifo& X) = delete;
~chunked_fifo();
chunked_fifo& operator=(const chunked_fifo&) = delete;
chunked_fifo& operator=(chunked_fifo&&) noexcept;
inline void push_back(const T& data);
inline void push_back(T&& data);
T& back();
const T& back() const;
template <typename... A>
inline void emplace_back(A&&... args);
inline T& front() const noexcept;
inline void pop_front() noexcept;
inline bool empty() const noexcept;
inline size_t size() const noexcept;
void clear() noexcept;
// reserve(n) ensures that at least (n - size()) further push() calls can
// be served without needing new memory allocation.
// Calling pop()s between these push()es is also allowed and does not
// alter this guarantee.
// Note that reserve() does not reduce the amount of memory already
// reserved - use shrink_to_fit() for that.
void reserve(size_t n);
// shrink_to_fit() frees memory held, but unused, by the queue. Such
// unused memory might exist after pops, or because of reserve().
void shrink_to_fit();
private:
void back_chunk_new();
void front_chunk_delete() noexcept;
inline void ensure_room_back();
void undo_room_back();
inline size_t mask(size_t idx) const noexcept;
};
template <typename T, size_t items_per_chunk>
inline
chunked_fifo<T, items_per_chunk>::chunked_fifo(chunked_fifo&& x) noexcept
: _front_chunk(x._front_chunk)
, _back_chunk(x._back_chunk)
, _nchunks(x._nchunks)
, _free_chunks(x._free_chunks)
, _nfree_chunks(x._nfree_chunks) {
x._front_chunk = nullptr;
x._back_chunk = nullptr;
x._nchunks = 0;
x._free_chunks = nullptr;
x._nfree_chunks = 0;
}
template <typename T, size_t items_per_chunk>
inline
chunked_fifo<T, items_per_chunk>&
chunked_fifo<T, items_per_chunk>::operator=(chunked_fifo&& x) noexcept {
if (&x != this) {
this->~chunked_fifo();
new (this) chunked_fifo(std::move(x));
}
return *this;
}
template <typename T, size_t items_per_chunk>
inline size_t
chunked_fifo<T, items_per_chunk>::mask(size_t idx) const noexcept {
return idx & (items_per_chunk - 1);
}
template <typename T, size_t items_per_chunk>
inline bool
chunked_fifo<T, items_per_chunk>::empty() const noexcept {
return _front_chunk == nullptr;
}
template <typename T, size_t items_per_chunk>
inline size_t
chunked_fifo<T, items_per_chunk>::size() const noexcept{
if (_front_chunk == nullptr) {
return 0;
} else if (_back_chunk == _front_chunk) {
// Single chunk.
return _front_chunk->end - _front_chunk->begin;
} else {
return _front_chunk->end - _front_chunk->begin
+_back_chunk->end - _back_chunk->begin
+ (_nchunks - 2) * items_per_chunk;
}
}
template <typename T, size_t items_per_chunk>
void chunked_fifo<T, items_per_chunk>::clear() noexcept {
#if 1
while (!empty()) {
pop_front();
}
#else
// This is specialized code to free the contents of all the chunks and the
// chunks themselves. but since destroying a very full queue is not an
// important use case to optimize, the simple loop above is preferable.
if (!_front_chunk) {
// Empty, nothing to do
return;
}
// Delete front chunk (partially filled)
for (auto i = _front_chunk->begin; i != _front_chunk->end; ++i) {
_front_chunk->items[mask(i)].~T();
}
chunk *p = _front_chunk->next;
operator delete(_front_chunk, sizeof(chunk));
// Delete all the middle chunks (all completely filled)
if (p) {
while (p != _back_chunk) {
// These are full chunks
chunk *nextp = p->next;
for (auto i = 0; i != items_per_chunk; ++i) {
// Note we delete out of order (we don't start with p->begin).
// That should be fine..
p->items[i].~T();
}
operator delete(p, sizeof(chunk));
p = nextp;
}
// Finally delete back chunk (partially filled)
for (auto i = _back_chunk->begin; i != _back_chunk->end; ++i) {
_back_chunk->items[mask(i)].~T();
}
operator delete(_back_chunk, sizeof(chunk));
}
_front_chunk = nullptr;
_back_chunk = nullptr;
_nchunks = 0;
#endif
}
template <typename T, size_t items_per_chunk> void
chunked_fifo<T, items_per_chunk>::shrink_to_fit() {
while (_free_chunks) {
auto next = _free_chunks->next;
operator delete(_free_chunks, sizeof(chunk));
_free_chunks = next;
}
_nfree_chunks = 0;
}
template <typename T, size_t items_per_chunk>
chunked_fifo<T, items_per_chunk>::~chunked_fifo() {
clear();
shrink_to_fit();
}
template <typename T, size_t items_per_chunk>
void
chunked_fifo<T, items_per_chunk>::back_chunk_new() {
chunk *old = _back_chunk;
if (_free_chunks) {
_back_chunk = _free_chunks;
_free_chunks = _free_chunks->next;
--_nfree_chunks;
} else {
_back_chunk = static_cast<chunk*>(operator new(sizeof(chunk)));
}
_back_chunk->next = nullptr;
_back_chunk->begin = 0;
_back_chunk->end = 0;
if (old) {
old->next = _back_chunk;
}
if (_front_chunk == nullptr) {
_front_chunk = _back_chunk;
}
_nchunks++;
}
template <typename T, size_t items_per_chunk>
inline void
chunked_fifo<T, items_per_chunk>::ensure_room_back() {
// If we don't have a back chunk or it's full, we need to create a new one
if (_back_chunk == nullptr ||
(_back_chunk->end - _back_chunk->begin) == items_per_chunk) {
back_chunk_new();
}
}
template <typename T, size_t items_per_chunk>
void
chunked_fifo<T, items_per_chunk>::undo_room_back() {
// If we failed creating a new item after ensure_room_back() created a
// new empty chunk, we must remove it, or empty() will be incorrect
// (either immediately, if the fifo was empty, or when all the items are
// popped, if it already had items).
if (_back_chunk->begin == _back_chunk->end) {
operator delete(_back_chunk, sizeof(chunk));
--_nchunks;
if (_nchunks == 0) {
_back_chunk = nullptr;
_front_chunk = nullptr;
} else {
// Because we don't usually pop from the back, we don't have a "prev"
// pointer so we need to find the previous chunk the hard and slow
// way. B
chunk *old = _back_chunk;
_back_chunk = _front_chunk;
while (_back_chunk->next != old) {
_back_chunk = _back_chunk->next;
}
_back_chunk->next = nullptr;
}
}
}
template <typename T, size_t items_per_chunk>
template <typename... Args>
inline void
chunked_fifo<T, items_per_chunk>::emplace_back(Args&&... args) {
ensure_room_back();
auto p = &_back_chunk->items[mask(_back_chunk->end)];
try {
new(p) T(std::forward<Args>(args)...);
} catch(...) {
undo_room_back();
throw;
}
++_back_chunk->end;
}
template <typename T, size_t items_per_chunk>
inline void
chunked_fifo<T, items_per_chunk>::push_back(const T& data) {
ensure_room_back();
auto p = &_back_chunk->items[mask(_back_chunk->end)];
try {
new(p) T(data);
} catch(...) {
undo_room_back();
throw;
}
++_back_chunk->end;
}
template <typename T, size_t items_per_chunk>
inline void
chunked_fifo<T, items_per_chunk>::push_back(T&& data) {
ensure_room_back();
auto p = &_back_chunk->items[mask(_back_chunk->end)];
try {
new(p) T(std::move(data));
} catch(...) {
undo_room_back();
throw;
}
++_back_chunk->end;
}
template <typename T, size_t items_per_chunk>
inline
T&
chunked_fifo<T, items_per_chunk>::back() {
return _back_chunk->items[mask(_back_chunk->end - 1)];
}
template <typename T, size_t items_per_chunk>
inline
const T&
chunked_fifo<T, items_per_chunk>::back() const {
return _back_chunk->items[mask(_back_chunk->end - 1)];
}
template <typename T, size_t items_per_chunk>
inline T&
chunked_fifo<T, items_per_chunk>::front() const noexcept {
return _front_chunk->items[mask(_front_chunk->begin)];
}
template <typename T, size_t items_per_chunk>
inline void
chunked_fifo<T, items_per_chunk>::front_chunk_delete() noexcept {
chunk *next = _front_chunk->next;
// Certain use cases may need to repeatedly allocate and free a chunk -
// an obvious example is an empty queue to which we push, and then pop,
// repeatedly. Another example is pushing and popping to a non-empty queue
// we push and pop at different chunks so we need to free and allocate a
// chunk every items_per_chunk operations.
// The solution is to keep a list of freed chunks instead of freeing them
// immediately. There is a performance/memory tradeoff of how many freed
// chunks to save: If we save them all, the queue can never shrink from
// its maximum memory use (this is how circular_buffer behaves).
// The ad-hoc choice made here is to limit the number of saved chunks to 1,
// but this could easily be made a configuration option.
static constexpr int save_free_chunks = 1;
if (_nfree_chunks < save_free_chunks) {
_front_chunk->next = _free_chunks;
_free_chunks = _front_chunk;
++_nfree_chunks;
} else {
operator delete(_front_chunk, sizeof(chunk));
}
// If we only had one chunk, _back_chunk is gone too.
if (_back_chunk == _front_chunk) {
_back_chunk = nullptr;
}
_front_chunk = next;
--_nchunks;
}
template <typename T, size_t items_per_chunk>
inline void
chunked_fifo<T, items_per_chunk>::pop_front() noexcept {
front().~T();
// If the front chunk has become empty, we need to free remove it and use
// the next one.
if (++_front_chunk->begin == _front_chunk->end) {
front_chunk_delete();
}
}
template <typename T, size_t items_per_chunk>
void chunked_fifo<T, items_per_chunk>::reserve(size_t n) {
// reserve() guarantees that (n - size()) additional push()es will
// succeed without reallocation:
size_t need = n - size();
// If we already have a back chunk, it might have room for some pushes
// before filling up, so decrease "need":
if (_back_chunk) {
need -= items_per_chunk - (_back_chunk->end - _back_chunk->begin);
}
size_t needed_chunks = (need + items_per_chunk - 1) / items_per_chunk;
// If we already have some freed chunks saved, we need to allocate fewer
// additional chunks, or none at all
if (needed_chunks <= _nfree_chunks) {
return;
}
needed_chunks -= _nfree_chunks;
while (needed_chunks--) {
chunk *c = static_cast<chunk*>(operator new(sizeof(chunk)));
c->next = _free_chunks;
_free_chunks = c;
++_nfree_chunks;
}
}