Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Grid index #113

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ cc_library(
"include/phtree/distance.h",
"include/phtree/filter.h",
"include/phtree/phtree.h",
"include/phtree/phtree_grid_index.h",
"include/phtree/phtree_multimap.h",
],
includes = [
Expand Down
7 changes: 6 additions & 1 deletion TODO.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@ Ideas that didn't work
Counting showed that PQ would go 3-5 nodes deep (100K:3, 10M: 5) but that had no effect.
Lesson: Look at WQ initialization, it may be too expensive. Why is WQ traversal so slow???


#XX Grid-index: This works reasonably well but needs more testing:
- as one can expect, update() works less well on highly clustered data, but for_each() works 2x-3x better!
- It appears to work better for large N, but that is probably heavily dependent on the chosen grid size.
- TODO KNN Requires a distance function API that has "Entry" as parameter
Lesson: Overall GOOD!
-> Look further into this!

Fix const-ness
==============
Expand Down
58 changes: 53 additions & 5 deletions benchmark/query_mm_d_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "benchmark_util.h"
#include "logging.h"
#include "phtree/phtree.h"
#include "phtree/phtree_grid_index.h"
#include "phtree/phtree_multimap.h"
#include <benchmark/benchmark.h>
#include <random>
Expand All @@ -32,11 +33,12 @@ namespace {

const double GLOBAL_MAX = 10000;

enum Scenario { TREE_WITH_MAP, MULTI_MAP, MULTI_MAP_STD };
enum Scenario { TREE_WITH_MAP, MULTI_MAP, MULTI_MAP_STD, GRID_INDEX };

using TestPoint = PhPointD<3>;
using QueryBox = PhBoxD<3>;
using payload_t = TestPoint;
// using payload_t = TestPoint; // TODO!?!?
using payload_t = size_t;
using BucketType = std::set<payload_t>;

struct Query {
Expand All @@ -53,7 +55,29 @@ using TestMap = typename std::conditional_t<
typename std::conditional_t<
SCENARIO == MULTI_MAP,
PhTreeMultiMapD<DIM, payload_t, CONVERTER<SCENARIO, DIM>, b_plus_tree_hash_set<payload_t>>,
PhTreeMultiMapD<DIM, payload_t, CONVERTER<SCENARIO, DIM>, std::unordered_set<payload_t>>>>;
typename std::conditional_t<
SCENARIO == GRID_INDEX,
PhTreeGridIndex<DIM, payload_t>,
PhTreeMultiMapD<
DIM,
payload_t,
CONVERTER<SCENARIO, DIM>,
std::unordered_set<payload_t>>>>>;

template <dimension_t DIM, Scenario SCENARIO>
TestMap<SCENARIO, DIM> CreateTree(
size_t n, typename std::enable_if_t<SCENARIO == Scenario::GRID_INDEX>* dummy = 0) {
(void)dummy;
auto edge_len = GLOBAL_MAX * pow(10. / (double)n, 1. / (double)DIM);
return TestMap<SCENARIO, DIM>(edge_len);
}

template <dimension_t DIM, Scenario SCENARIO>
TestMap<SCENARIO, DIM> CreateTree(
size_t, typename std::enable_if_t<SCENARIO != Scenario::GRID_INDEX>* dummy = 0) {
(void)dummy;
return TestMap<SCENARIO, DIM>();
}

template <dimension_t DIM, Scenario SCENARIO>
class IndexBenchmark {
Expand Down Expand Up @@ -86,7 +110,7 @@ IndexBenchmark<DIM, SCENARIO>::IndexBenchmark(benchmark::State& state, double av
: data_type_{static_cast<TestGenerator>(state.range(1))}
, num_entities_(state.range(0))
, avg_query_result_size_(avg_query_result_size)
, tree_{}
, tree_{CreateTree<DIM, SCENARIO>(num_entities_)}
, random_engine_{1}
, cube_distribution_{0, GLOBAL_MAX}
, points_(state.range(0)) {
Expand Down Expand Up @@ -121,6 +145,12 @@ void InsertEntry(
tree.emplace(point, data);
}

template <dimension_t DIM>
void InsertEntry(
TestMap<Scenario::GRID_INDEX, DIM>& tree, const PhPointD<DIM>& point, payload_t data) {
tree.emplace(point, data);
}

template <dimension_t DIM>
void InsertEntry(
TestMap<Scenario::MULTI_MAP_STD, DIM>& tree,
Expand Down Expand Up @@ -154,6 +184,13 @@ typename std::enable_if<SCENARIO == Scenario::TREE_WITH_MAP, size_t>::type Count
return counter.n_;
}

template <dimension_t DIM, Scenario SCENARIO>
size_t CountEntries(TestMap<Scenario::GRID_INDEX, DIM>& tree, const Query& query) {
CounterMultiMap counter{0};
tree.for_each(query.box, counter);
return counter.n_;
}

template <dimension_t DIM, Scenario SCENARIO>
size_t CountEntries(TestMap<Scenario::MULTI_MAP, DIM>& tree, const Query& query) {
CounterMultiMap counter{0};
Expand All @@ -174,7 +211,7 @@ void IndexBenchmark<DIM, SCENARIO>::SetupWorld(benchmark::State& state) {
// create data with about 10% duplicate coordinates
CreatePointData<DIM>(points_, data_type_, num_entities_, 0, GLOBAL_MAX, 0.1);
for (size_t i = 0; i < num_entities_; ++i) {
InsertEntry(tree_, points_[i], points_[i]);
InsertEntry(tree_, points_[i], i); // points_[i]);
}

state.counters["query_rate"] = benchmark::Counter(0, benchmark::Counter::kIsRate);
Expand Down Expand Up @@ -218,6 +255,12 @@ void PhTreeMultiMapStd3D(benchmark::State& state, Arguments&&... arguments) {
benchmark.Benchmark(state);
}

template <typename... Arguments>
void PhTreeGI3D(benchmark::State& state, Arguments&&... arguments) {
IndexBenchmark<3, Scenario::GRID_INDEX> benchmark{state, arguments...};
benchmark.Benchmark(state);
}

template <typename... Arguments>
void PhTree3D(benchmark::State& state, Arguments&&... arguments) {
IndexBenchmark<3, Scenario::TREE_WITH_MAP> benchmark{state, arguments...};
Expand All @@ -231,6 +274,11 @@ BENCHMARK_CAPTURE(PhTreeMultiMap3D, WQ_100, 100.0)
->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}})
->Unit(benchmark::kMillisecond);

BENCHMARK_CAPTURE(PhTreeGI3D, WQ_100, 100.0)
->RangeMultiplier(10)
->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}})
->Unit(benchmark::kMillisecond);

// PhTreeMultiMap
BENCHMARK_CAPTURE(PhTreeMultiMapStd3D, WQ_100, 100.0)
->RangeMultiplier(10)
Expand Down
50 changes: 47 additions & 3 deletions benchmark/update_mm_d_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "benchmark_util.h"
#include "logging.h"
#include "phtree/phtree.h"
#include "phtree/phtree_grid_index.h"
#include "phtree/phtree_multimap.h"
#include <benchmark/benchmark.h>
#include <random>
Expand All @@ -34,7 +35,13 @@ std::vector<double> MOVE_DISTANCE = {0, 1.0, 10};

const double GLOBAL_MAX = 10000;

enum Scenario { ERASE_EMPLACE, MM_BPT_RELOCATE, MM_SET_RELOCATE, MM_SET_RELOCATE_IF };
enum Scenario {
ERASE_EMPLACE,
MM_BPT_RELOCATE,
GI_BPT_RELOCATE,
MM_SET_RELOCATE,
MM_SET_RELOCATE_IF
};

using payload_t = scalar_64_t;

Expand All @@ -53,7 +60,10 @@ using TestMap = typename std::conditional_t<
typename std::conditional_t<
SCENARIO == MM_BPT_RELOCATE,
PhTreeMultiMapD<DIM, payload_t, CONVERTER<SCENARIO, DIM>, b_plus_tree_hash_set<payload_t>>,
PhTreeMultiMapD<DIM, payload_t, CONVERTER<SCENARIO, DIM>, std::set<payload_t>>>>;
typename std::conditional_t<
SCENARIO == GI_BPT_RELOCATE,
PhTreeGridIndexD<DIM, payload_t>,
PhTreeMultiMapD<DIM, payload_t, CONVERTER<SCENARIO, DIM>, std::set<payload_t>>>>>;

template <dimension_t DIM>
struct UpdateOp {
Expand All @@ -62,6 +72,21 @@ struct UpdateOp {
PointType<DIM> new_;
};

template <dimension_t DIM, Scenario SCENARIO>
TestMap<SCENARIO, DIM> CreateTree(
size_t n, typename std::enable_if_t<SCENARIO == Scenario::GI_BPT_RELOCATE>* dummy = 0) {
(void)dummy;
auto edge_len = GLOBAL_MAX * pow(10. / (double)n, 1. / (double)DIM);
return TestMap<SCENARIO, DIM>(edge_len);
}

template <dimension_t DIM, Scenario SCENARIO>
TestMap<SCENARIO, DIM> CreateTree(
size_t, typename std::enable_if_t<SCENARIO != Scenario::GI_BPT_RELOCATE>* dummy = 0) {
(void)dummy;
return TestMap<SCENARIO, DIM>();
}

template <dimension_t DIM, Scenario SCENARIO>
class IndexBenchmark {
public:
Expand Down Expand Up @@ -96,6 +121,7 @@ IndexBenchmark<DIM, SCENARIO>::IndexBenchmark(
, num_entities_(state.range(0))
, updates_per_round_(updates_per_round)
, move_distance_(std::move(move_distance))
, tree_{CreateTree<DIM, SCENARIO>(num_entities_)}
, points_(num_entities_)
, updates_(updates_per_round)
, random_engine_{0}
Expand Down Expand Up @@ -128,6 +154,12 @@ void InsertEntry(
tree.emplace(point, data);
}

template <dimension_t DIM>
void InsertEntry(
TestMap<Scenario::GI_BPT_RELOCATE, DIM>& tree, const PointType<DIM>& point, payload_t data) {
tree.emplace(point, data);
}

template <dimension_t DIM>
void InsertEntry(
TestMap<Scenario::MM_SET_RELOCATE, DIM>& tree, const PointType<DIM>& point, payload_t data) {
Expand Down Expand Up @@ -161,7 +193,8 @@ typename std::enable_if<SCENARIO == Scenario::ERASE_EMPLACE, size_t>::type Updat

template <dimension_t DIM, Scenario SCENARIO>
typename std::enable_if<
SCENARIO == Scenario::MM_BPT_RELOCATE || SCENARIO == Scenario::MM_SET_RELOCATE,
SCENARIO == Scenario::MM_BPT_RELOCATE || SCENARIO == Scenario::MM_SET_RELOCATE ||
SCENARIO == Scenario::GI_BPT_RELOCATE,
size_t>::type
UpdateEntry(TestMap<SCENARIO, DIM>& tree, std::vector<UpdateOp<DIM>>& updates) {
size_t n = 0;
Expand Down Expand Up @@ -246,6 +279,12 @@ void PhTreeMMRelocateBpt3D(benchmark::State& state, Arguments&&... arguments) {
benchmark.Benchmark(state);
}

template <typename... Arguments>
void PhTreeGIRelocateBpt3D(benchmark::State& state, Arguments&&... arguments) {
IndexBenchmark<3, Scenario::GI_BPT_RELOCATE> benchmark{state, arguments...};
benchmark.Benchmark(state);
}

template <typename... Arguments>
void PhTreeMMRelocateStdSet3D(benchmark::State& state, Arguments&&... arguments) {
IndexBenchmark<3, Scenario::MM_SET_RELOCATE> benchmark{state, arguments...};
Expand All @@ -271,6 +310,11 @@ BENCHMARK_CAPTURE(PhTreeMMRelocateBpt3D, UPDATE_1000, UPDATES_PER_ROUND)
->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}})
->Unit(benchmark::kMillisecond);

BENCHMARK_CAPTURE(PhTreeGIRelocateBpt3D, UPDATE_1000, UPDATES_PER_ROUND)
->RangeMultiplier(10)
->Ranges({{1000, 1000 * 1000}, {TestGenerator::CUBE, TestGenerator::CLUSTER}})
->Unit(benchmark::kMillisecond);

// PhTreeMultiMap with std::set
BENCHMARK_CAPTURE(PhTreeMMRelocateStdSet3D, UPDATE_1000, UPDATES_PER_ROUND)
->RangeMultiplier(10)
Expand Down
Loading