Skip to content

Commit

Permalink
chore: bloom test - cover corner cases (dragonflydb#2806)
Browse files Browse the repository at this point in the history
Signed-off-by: Roman Gershman <[email protected]>
  • Loading branch information
romange authored Apr 2, 2024
1 parent d3b90c8 commit a93ad4e
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 10 deletions.
23 changes: 17 additions & 6 deletions src/core/bloom.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,18 +51,17 @@ void Bloom::Init(uint64_t entries, double fp_prob, PMR_NS::memory_resource* heap
CHECK(bf_ == nullptr);
CHECK(fp_prob > 0 && fp_prob < 1);

if (entries < 1024)
entries = 1024;

if (fp_prob > 0.5)
fp_prob = 0.5;
double bpe = BPE(fp_prob);

hash_cnt_ = ceil(M_LN2 * bpe);

uint64_t bits = uint64_t(ceil(entries * bpe));
bits = absl::bit_ceil(bits); // make it power of 2.
if (bits < 1024) {
bits = 1024;
if (bits < 512) {
bits = 512;
}
bits = absl::bit_ceil(bits); // make it power of 2.

uint64_t length = bits / 8;
bf_ = (uint8_t*)heap->allocate(length);
Expand Down Expand Up @@ -111,6 +110,8 @@ bool Bloom::Add(const uint64_t fp[2]) {
}

size_t Bloom::Capacity(double fp_prob) const {
if (fp_prob > 0.5)
fp_prob = 0.5;
double bpe = BPE(fp_prob);
return floor(bitlen() / bpe);
}
Expand Down Expand Up @@ -197,4 +198,14 @@ bool SBF::Exists(std::string_view str) const {
return any_of(filters_.crbegin(), filters_.crend(), exists);
}

size_t SBF::MallocUsed() const {
size_t res = filters_.capacity() * sizeof(Bloom);
for (const auto& b : filters_) {
res += (b.bitlen() / 8);
}
res += sizeof(SBF);

return res;
}

} // namespace dfly
14 changes: 13 additions & 1 deletion src/core/bloom.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ class Bloom {
return 1ULL << bit_log_;
}

// Note that max element capacity is floor(bit_len / bpe), where bpe (bits per element) is
// Max element capacity for this bloom filter.
// Note that capacity is floor(bit_len / bpe), where bpe (bits per element) is
// derived from fp_prob.
size_t Capacity(double fp_prob) const;

Expand Down Expand Up @@ -83,12 +84,23 @@ class SBF {
bool Add(std::string_view str);
bool Exists(std::string_view str) const;

size_t GetSize() const {
return prev_size_ + current_size_;
}

size_t MallocUsed() const;

double grow_factor() const {
return grow_factor_;
}

private:
// multiple filters from the smallest to the largest.
std::vector<Bloom, PMR_NS::polymorphic_allocator<Bloom>> filters_;
double grow_factor_;
double fp_prob_;
size_t current_size_ = 0;
size_t prev_size_ = 0;
size_t max_capacity_;
};

Expand Down
18 changes: 15 additions & 3 deletions src/core/bloom_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,19 +61,31 @@ TEST_F(BloomTest, ErrorBound) {
EXPECT_EQ(collisions, 0) << max_capacity;
}

TEST_F(BloomTest, Extreme) {
Bloom b2;

// Init with unreasonable large error probability.
b2.Init(10, 0.999, PMR_NS::get_default_resource());

EXPECT_EQ(512, b2.bitlen()); // minimal bit length, even though requested smaller capacity.
EXPECT_LT(b2.Capacity(0.999), 512); // make sure our element capacity is smaller.
b2.Destroy(PMR_NS::get_default_resource());
}

TEST_F(BloomTest, SBF) {
SBF sbf(10, 0.001, 2, PMR_NS::get_default_resource());

unsigned collisions = 0;
constexpr unsigned kNumElems = 1000000;
constexpr unsigned kNumElems = 2000000;
for (unsigned i = 0; i < kNumElems; ++i) {
if (!sbf.Add(absl::StrCat("item", i))) {
++collisions;
}
}

// TODO: I should revisit the math for error bound computation.
EXPECT_LE(collisions, kNumElems * 0.0015);
// TODO: to revisit the math for deriving number of hash functions for each filter
// according the the SBF paper.
EXPECT_LE(collisions, kNumElems * 0.008);
}

static void BM_BloomExist(benchmark::State& state) {
Expand Down

0 comments on commit a93ad4e

Please sign in to comment.