forked from apple/foundationdb
-
Notifications
You must be signed in to change notification settings - Fork 0
/
CompressedInt.h
134 lines (124 loc) · 5.04 KB
/
CompressedInt.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
/*
* CompressedInt.h
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <stdint.h>
// A signed compressed integer format that retains ordering in compressed form.
// Format is: [~sign_bit] [unary_len] [value_bits]
// If the sign bit is 0 then all other bits are inverted to maintain sort order
// in encoded form.
//
// Examples: 0/1/n = bit N=byte bits (w/o sign) Total encoded value length
// 10nn nnnn 6 bits 1 byte
// 110n nnnn N{1} 13 bits 2 bytes
// 1110 nnnn N{2} 20 bits 3 bytes
// 1111 0nnn N{3} 27 bits 4 bytes
// 1111 10nn N{4} 34 bits 5 bytes
// 1111 110n N{5} 41 bits 6 bytes
// 1111 1110 N{6} 48 bits 7 bytes
// 1111 1111 0nnn nnnn N{6} 55 bits 8 bytes
// 1111 1111 10nn nnnn N{7} 62 bits 9 bytes
// 1111 1111 110n nnnn N{8} 69 bits 10 bytes
// 1111 1111 1110 nnnn N{9} 76 bits 11 bytes
// 1111 1111 1111 0nnn N{10} 83 bits 12 bytes
// 1111 1111 1111 10nn N{11} 90 bits 13 bytes
// 1111 1111 1111 110n N{12} 97 bits 14 bytes
// 1111 1111 1111 1110 N{13} 104 bits 15 bytes
// 1111 1111 1111 1111 0nnn nnnn N{13} 111 bits 16 bytes
// 1111 1111 1111 1111 10nn nnnn N{14} 118 bits 17 bytes
// 1111 1111 1111 1111 110n nnnn N{15} 125 bits 18 bytes
template <typename IntType>
struct CompressedInt {
CompressedInt(IntType i = 0) : value(i) {}
IntType value;
template <class Ar>
void serialize(Ar& ar) {
if (ar.isDeserializing) {
uint8_t b;
serializer(ar, b);
int bytesToRead = 0; // Additional bytes to read after the required first byte
bool positive = (b & 0x80) != 0; // Sign bit
if (!positive)
b = ~b; // Negative, so invert bytes read
b &= 0x7f; // Clear sign bit
uint8_t hb = 0x40; // Next header bit to test
// Scan the unary len bits across multiple bytes if needed
while (1) {
if (hb == 0) { // Go to next byte if needed
serializer(ar, b); // Read byte
if (!positive)
b = ~b; // Negative, so invert bytes read
hb = 0x80; // Reset header test bit position
--bytesToRead; // Decrement bytes to read since a byte was just read
}
if ((b & hb) == 0) // If a 0 is found, found the end of the unary sequence
break;
++bytesToRead; // Found a 1 so increment bytes to read
b &= ~hb; // Clear the bit just tested.
hb >>= 1; // Shift header test bit to next lowest position
}
value = b; // b contains the highest byte of value
while (bytesToRead-- != 0) {
serializer(ar, b); // Read byte
if (!positive)
b = ~b; // Negative, so invert bytes read
value <<= 8; // Shift value up to make room for new byte
value |= b; // OR the byte into place
}
if (!positive) // If negative, reverse all bits
value = ~value;
} else {
uint8_t buf[sizeof(IntType) * 2];
int iv = sizeof(buf); // Index of last written value byte
bool neg = value < 0; // If value is negative, flip its bits
IntType v = neg ? ~value : value;
// Write the value bytes from LSB to the rightmost zero byte to the output buffer
while (v) {
buf[--iv] = (uint8_t)v;
v >>= 8;
};
int bitlen = (sizeof(buf) - iv) * 8; // Value bits written so far
if (bitlen != 0) { // Reduce bit length by leading 0s in highest value byte
uint8_t b = buf[iv]; // Get highest value byte
while (!(b & 0x80)) { // While its highest bit is not a 1
--bitlen; // Decrement bit length
b <<= 1; // Shift left to test next lowest position
}
}
int encodedLen = bitlen / 7 + 1; // Calculate length of total encoded value
int iStart = sizeof(buf) - encodedLen; // Starting index of encoded output byte
for (int ih = iStart; ih < iv; ++ih) // Clear any bytes not initialized with a value bit
buf[ih] = 0;
int ih = iStart;
uint8_t b = 0x80; // First unary len bit to set
for (int hb = encodedLen; hb > 0; --hb) { // Set the sign bit and all but the last unary len bit to 1
if (b == 0) { // Start writing a new byte if needed
++ih;
b = 0x80;
}
buf[ih] |= b;
b >>= 1;
}
if (neg) // If negative, bit flip the entire encoded thing
for (int i = iStart; i < sizeof(buf); ++i)
buf[i] = ~buf[i];
ar.serializeBytes(buf + iStart, encodedLen);
}
}
};