-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHash.h
285 lines (247 loc) · 8.92 KB
/
Hash.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
// See the file "COPYING" in the main distribution directory for copyright.
/***
* This file contains functions to generate hashes used keyed hash functions.
* Keyed hash functions make it difficult/impossible to find information about the
* output of a hash when the key is unknown to the attacker. This fact holds, even
* when the input value is known.
*
* We use these kinds of hashes heavily internally - e.g. for scriptland hash generation.
* It is important that these hashes are not easily guessable to prevent complexity attacks.
*
* The HashKey class is the actual class that is used to generate Hash keys that are used internally,
* e.g. for lookups in hash-tables; the Hashes are also used for connection ID generation.
*
* This means that the hashes created by most functions in this file will be different each run, unless
* a seed file is used. There are a few functions that create hashes that are static over runs
* and use an installation-wide seed value; these are specifically called out.
*/
#pragma once
#include <stdlib.h>
#include "zeek/util.h" // for bro_int_t
// to allow bro_md5_hmac access to the hmac seed
#include "zeek/ZeekArgs.h"
namespace zeek { class String; }
namespace zeek::detail {
class Frame;
class BifReturnVal;
}
namespace zeek::BifFunc {
extern zeek::detail::BifReturnVal md5_hmac_bif(zeek::detail::Frame* frame, const zeek::Args*);
}
namespace zeek::detail {
typedef uint64_t hash_t;
typedef uint64_t hash64_t;
typedef uint64_t hash128_t[2];
typedef uint64_t hash256_t[4];
class KeyedHash {
public:
/**
* Generate a 64 bit digest hash.
*
* This hash is seeded with random data, unless the ZEEK_SEED_FILE environment
* variable is set. Thus, typically every node will return a different hash
* after every restart.
*
* This should be used for internal hashes that do not have to be stable over
* the cluster/runs - like, e.g. connection ID generation.
*
* @param bytes Bytes to hash
*
* @param size Size of bytes
*
* @returns 64 bit digest hash
*/
static hash64_t Hash64(const void* bytes, uint64_t size);
/**
* Generate a 128 bit digest hash.
*
* This hash is seeded with random data, unless the ZEEK_SEED_FILE environment
* variable is set. Thus, typically every node will return a different hash
* after every restart.
*
* This should be used for internal hashes that do not have to be stable over
* the cluster/runs - like, e.g. connection ID generation.
*
* @param bytes Bytes to hash
*
* @param size Size of bytes
*
* @param result Result of the hashing operation.
*/
static void Hash128(const void* bytes, uint64_t size, hash128_t* result);
/**
* Generate a 256 bit digest hash.
*
* This hash is seeded with random data, unless the ZEEK_SEED_FILE environment
* variable is set. Thus, typically every node will return a different hash
* after every restart.
*
* This should be used for internal hashes that do not have to be stable over
* the cluster/runs - like, e.g. connection ID generation.
*
* @param bytes Bytes to hash
*
* @param size Size of bytes
*
* @param result Result of the hashing operation.
*/
static void Hash256(const void* bytes, uint64_t size, hash256_t* result);
/**
* Generates a installation-specific 64 bit hash.
*
* This function generates a 64 bit digest hash, which is stable over a cluster
* or a restart.
*
* To be more exact - the seed value for this hash is generated from the script-level
* :zeek:see:`digest_salt` constant. The seeds are stable as long as this value
* is not changed.
*
* This should be used for hashes that have to remain stable over the entire
* cluster. An example are file IDs, which have to be stable over several workers.
*
* @param bytes Bytes to hash
*
* @param size Size of bytes
*
* @returns 64 bit digest hash
*/
static hash64_t StaticHash64(const void* bytes, uint64_t size);
/**
* Generates a installation-specific 128 bit hash.
*
* This function generates a 128 bit digest hash, which is stable over a cluster
* or a restart.
*
* To be more exact - the seed value for this hash is generated from the script-level
* :zeek:see:`digest_salt` constant. The seeds are stable as long as this value
* is not changed.
*
* This should be used for hashes that have to remain stable over the entire
* cluster. An example are file IDs, which have to be stable over several workers.
*
* @param bytes Bytes to hash
*
* @param size Size of bytes
*
* @param result Result of the hashing operation.
*/
static void StaticHash128(const void* bytes, uint64_t size, hash128_t* result);
/**
* Generates a installation-specific 256 bit hash.
*
* This function generates a 128 bit digest hash, which is stable over a cluster
* or a restart.
*
* To be more exact - the seed value for this hash is generated from the script-level
* :zeek:see:`digest_salt` constant. The seeds are stable as long as this value
* is not changed.
*
* This should be used for hashes that have to remain stable over the entire
* cluster. An example are file IDs, which have to be stable over several workers.
*
* @param bytes Bytes to hash
*
* @param size Size of bytes
*
* @param result Result of the hashing operation.
*/
static void StaticHash256(const void* bytes, uint64_t size, hash256_t* result);
/**
* Size of the initial seed
*/
constexpr static int SEED_INIT_SIZE = 20;
/**
* Initialize the (typically process-specific) seeds. This function is indirectly
* called from main, during early initialization.
*
* @param seed_data random data used as an initial seed
*/
static void InitializeSeeds(const std::array<uint32_t, SEED_INIT_SIZE>& seed_data);
/**
* Returns true if the process-specific seeds have been initialized
*
* @return True if the seeds are initialized
*/
static bool IsInitialized() { return seeds_initialized; }
/**
* Initializes the static hash seeds using the script-level
* :zeek:see:`digest_salt` constant.
*/
static void InitOptions();
private:
// actually HHKey. This key changes each start (unless a seed is specified)
alignas(32) static uint64_t shared_highwayhash_key[4];
// actually HHKey. This key is installation specific and sourced from the digest_salt script-level const.
alignas(32) static uint64_t cluster_highwayhash_key[4];
// actually HH_U64, which has the same type. This key changes each start (unless a seed is specified)
alignas(16) static unsigned long long shared_siphash_key[2];
// This key changes each start (unless a seed is specified)
inline static uint8_t shared_hmac_md5_key[16];
inline static bool seeds_initialized = false;
friend void util::detail::hmac_md5(size_t size, const unsigned char* bytes, unsigned char digest[16]);
friend BifReturnVal BifFunc::md5_hmac_bif(zeek::detail::Frame* frame, const Args*);
};
typedef enum {
HASH_KEY_INT,
HASH_KEY_DOUBLE,
HASH_KEY_STRING
} HashKeyTag;
constexpr int NUM_HASH_KEYS = HASH_KEY_STRING + 1;
class HashKey {
public:
explicit HashKey(bro_int_t i);
explicit HashKey(bro_uint_t u);
explicit HashKey(uint32_t u);
HashKey(const uint32_t u[], int n);
explicit HashKey(double d);
explicit HashKey(const void* p);
explicit HashKey(const char* s);
explicit HashKey(const String* s);
~HashKey()
{
if ( is_our_dynamic )
delete [] (char *) key;
}
// Create a HashKey given all of its components. "key" is assumed
// to be dynamically allocated and to now belong to this HashKey
// (to delete upon destruct'ing). If "copy_key" is true, it's
// first copied.
//
// The calling sequence here is unusual (normally key would be
// first) to avoid possible ambiguities with the next constructor,
// which is the more commonly used one.
HashKey(int copy_key, void* key, int size);
// Same, but automatically copies the key.
HashKey(const void* key, int size, hash_t hash);
// Builds a key from the given chunk of bytes.
HashKey(const void* bytes, int size);
// Create a Hashkey given all of its components *without*
// copying the key and *without* taking ownership. Note that
// "dont_copy" is a type placeholder to differentiate this member
// function from the one above; its value is not used.
HashKey(const void* key, int size, hash_t hash, bool dont_copy);
// Hands over the key to the caller. This means that if the
// key is our dynamic, we give it to the caller and mark it
// as not our dynamic. If initially it's not our dynamic,
// we give them a copy of it.
void* TakeKey();
const void* Key() const { return key; }
int Size() const { return size; }
hash_t Hash() const { return hash; }
unsigned int MemoryAllocation() const { return padded_sizeof(*this) + util::pad_size(size); }
static hash_t HashBytes(const void* bytes, int size);
protected:
void* CopyKey(const void* key, int size) const;
union {
bro_int_t i;
uint32_t u32;
double d;
const void* p;
} key_u;
void* key;
hash_t hash;
int size;
bool is_our_dynamic = false;
};
extern void init_hash_function();
} // namespace zeek::detail