forked from marbl/Mash
-
Notifications
You must be signed in to change notification settings - Fork 0
/
MinHashHeap.h
49 lines (36 loc) · 1.45 KB
/
MinHashHeap.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#ifndef HashHeapCounted_h
#define HashHeapCounted_h
#include "HashList.h"
#include "HashPriorityQueue.h"
#include "HashSet.h"
#include <math.h>
#include "bloom_filter.hpp"
class MinHashHeap
{
public:
MinHashHeap(bool use64New, uint64_t cardinalityMaximumNew, uint64_t multiplicityMinimumNew = 1, uint64_t memoryBoundBytes = 0);
~MinHashHeap();
void clear();
double estimateMultiplicity() const;
double estimateSetSize() const;
void toHashList(HashList & hashList, std::vector<uint32_t> & counts) const;
void toHashList(HashList & hashList) const;
void tryInsert(hash_u hash);
private:
bool use64;
HashSet hashes;
HashPriorityQueue hashesQueue;
HashSet hashesPending;
HashPriorityQueue hashesQueuePending;
uint64_t cardinalityMaximum;
uint64_t multiplicityMinimum;
uint64_t multiplicitySum;
bloom_filter * bloomFilter;
uint64_t kmersTotal;
uint64_t kmersUsed;
};
inline double MinHashHeap::estimateMultiplicity() const {return hashes.size() ? (double)multiplicitySum / hashes.size() : 0;}
inline double MinHashHeap::estimateSetSize() const {return hashes.size() ? pow(2.0, use64 ? 64.0 : 32.0) * (double)hashes.size() / (use64 ? (double)hashesQueue.top().hash64 : (double)hashesQueue.top().hash32) : 0;}
inline void MinHashHeap::toHashList(HashList & hashList, std::vector<uint32_t> & counts) const {hashes.toHashList(hashList, counts);}
inline void MinHashHeap::toHashList(HashList & hashList) const {hashes.toHashList(hashList);}
#endif