Skip to content

Commit

Permalink
Extend KeyCondition interface so that it returns BoolMask.
Browse files Browse the repository at this point in the history
  • Loading branch information
zlobober committed Jan 29, 2020
1 parent 8aa6a21 commit 2d3321e
Show file tree
Hide file tree
Showing 9 changed files with 130 additions and 39 deletions.
2 changes: 1 addition & 1 deletion dbms/src/Interpreters/Set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector<K
* 1: the intersection of the set and the range is non-empty
* 2: the range contains elements not in the set
*/
BoolMask MergeTreeSetIndex::mayBeTrueInRange(const std::vector<Range> & key_ranges, const DataTypes & data_types)
BoolMask MergeTreeSetIndex::checkInRange(const std::vector<Range> & key_ranges, const DataTypes & data_types)
{
size_t tuple_size = indexes_mapping.size();

Expand Down
4 changes: 2 additions & 2 deletions dbms/src/Interpreters/Set.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ using Sets = std::vector<SetPtr>;
class IFunction;
using FunctionPtr = std::shared_ptr<IFunction>;

/// Class for mayBeTrueInRange function.
/// Class for checkInRange function.
class MergeTreeSetIndex
{
public:
Expand All @@ -199,7 +199,7 @@ class MergeTreeSetIndex

size_t size() const { return ordered_set.at(0)->size(); }

BoolMask mayBeTrueInRange(const std::vector<Range> & key_ranges, const DataTypes & data_types);
BoolMask checkInRange(const std::vector<Range> & key_ranges, const DataTypes & data_types);

private:
Columns ordered_set;
Expand Down
5 changes: 5 additions & 0 deletions dbms/src/Storages/MergeTree/BoolMask.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include "BoolMask.h"


const BoolMask BoolMask::consider_only_can_be_true(false, true);
const BoolMask BoolMask::consider_only_can_be_false(true, false);
13 changes: 13 additions & 0 deletions dbms/src/Storages/MergeTree/BoolMask.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,17 @@ struct BoolMask
{
return BoolMask(can_be_false, can_be_true);
}

/// If mask is (true, true), then it can no longer change under operation |.
/// We use this condition to early-exit KeyConditions::check{InRange,After} methods.
bool isComplete() const
{
return can_be_false && can_be_true;
}

/// These special constants are used to implement KeyCondition::mayBeTrue{InRange,After} via KeyCondition::check{InRange,After}.
/// When used as an initial_mask argument in KeyCondition::check{InRange,After} methods, they effectively prevent
/// calculation of discarded BoolMask component as it is already set to true.
static const BoolMask consider_only_can_be_true;
static const BoolMask consider_only_can_be_false;
};
88 changes: 66 additions & 22 deletions dbms/src/Storages/MergeTree/KeyCondition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -888,14 +888,15 @@ String KeyCondition::toString() const
*/

template <typename F>
static bool forAnyParallelogram(
static BoolMask forAnyParallelogram(
size_t key_size,
const Field * key_left,
const Field * key_right,
bool left_bounded,
bool right_bounded,
std::vector<Range> & parallelogram,
size_t prefix_size,
BoolMask initial_mask,
F && callback)
{
if (!left_bounded && !right_bounded)
Expand Down Expand Up @@ -944,37 +945,51 @@ static bool forAnyParallelogram(
for (size_t i = prefix_size + 1; i < key_size; ++i)
parallelogram[i] = Range();

if (callback(parallelogram))
return true;

BoolMask result = initial_mask;
result = result | callback(parallelogram);

/// There are several early-exit conditions (like the one below) hereinafter.
/// They are important; in particular, if initial_mask == BoolMask::consider_only_can_be_true
/// (which happens when this routine is called from KeyCondition::mayBeTrueXXX),
/// they provide significant speedup, which may be observed on merge_tree_huge_pk performance test.
if (result.isComplete()) {
return result;
}

/// [x1] x [y1 .. +inf)

if (left_bounded)
{
parallelogram[prefix_size] = Range(key_left[prefix_size]);
if (forAnyParallelogram(key_size, key_left, key_right, true, false, parallelogram, prefix_size + 1, callback))
return true;
result = result | forAnyParallelogram(key_size, key_left, key_right, true, false, parallelogram, prefix_size + 1, initial_mask, callback);
if (result.isComplete()) {
return result;
}
}

/// [x2] x (-inf .. y2]

if (right_bounded)
{
parallelogram[prefix_size] = Range(key_right[prefix_size]);
if (forAnyParallelogram(key_size, key_left, key_right, false, true, parallelogram, prefix_size + 1, callback))
return true;
result = result | forAnyParallelogram(key_size, key_left, key_right, false, true, parallelogram, prefix_size + 1, initial_mask, callback);
if (result.isComplete()) {
return result;
}
}

return false;
return result;
}


bool KeyCondition::mayBeTrueInRange(
BoolMask KeyCondition::checkInRange(
size_t used_key_size,
const Field * left_key,
const Field * right_key,
const DataTypes & data_types,
bool right_bounded) const
bool right_bounded,
BoolMask initial_mask) const
{
std::vector<Range> key_ranges(used_key_size, Range());

Expand All @@ -992,10 +1007,10 @@ bool KeyCondition::mayBeTrueInRange(
else
std::cerr << "+inf)\n";*/

return forAnyParallelogram(used_key_size, left_key, right_key, true, right_bounded, key_ranges, 0,
return forAnyParallelogram(used_key_size, left_key, right_key, true, right_bounded, key_ranges, 0, initial_mask,
[&] (const std::vector<Range> & key_ranges_parallelogram)
{
auto res = mayBeTrueInParallelogram(key_ranges_parallelogram, data_types);
auto res = checkInParallelogram(key_ranges_parallelogram, data_types);

/* std::cerr << "Parallelogram: ";
for (size_t i = 0, size = key_ranges.size(); i != size; ++i)
Expand All @@ -1006,11 +1021,11 @@ bool KeyCondition::mayBeTrueInRange(
});
}


std::optional<Range> KeyCondition::applyMonotonicFunctionsChainToRange(
Range key_range,
MonotonicFunctionsChain & functions,
DataTypePtr current_type
)
DataTypePtr current_type)
{
for (auto & func : functions)
{
Expand Down Expand Up @@ -1043,7 +1058,9 @@ std::optional<Range> KeyCondition::applyMonotonicFunctionsChainToRange(
return key_range;
}

bool KeyCondition::mayBeTrueInParallelogram(const std::vector<Range> & parallelogram, const DataTypes & data_types) const
BoolMask KeyCondition::checkInParallelogram(
const std::vector<Range> & parallelogram,
const DataTypes & data_types) const
{
std::vector<BoolMask> rpn_stack;
for (size_t i = 0; i < rpn.size(); ++i)
Expand Down Expand Up @@ -1091,7 +1108,7 @@ bool KeyCondition::mayBeTrueInParallelogram(const std::vector<Range> & parallelo
if (!element.set_index)
throw Exception("Set for IN is not created yet", ErrorCodes::LOGICAL_ERROR);

rpn_stack.emplace_back(element.set_index->mayBeTrueInRange(parallelogram, data_types));
rpn_stack.emplace_back(element.set_index->checkInRange(parallelogram, data_types));
if (element.function == RPNElement::FUNCTION_NOT_IN_SET)
rpn_stack.back() = !rpn_stack.back();
}
Expand Down Expand Up @@ -1132,22 +1149,49 @@ bool KeyCondition::mayBeTrueInParallelogram(const std::vector<Range> & parallelo
}

if (rpn_stack.size() != 1)
throw Exception("Unexpected stack size in KeyCondition::mayBeTrueInParallelogram", ErrorCodes::LOGICAL_ERROR);
throw Exception("Unexpected stack size in KeyCondition::checkInRange", ErrorCodes::LOGICAL_ERROR);

return rpn_stack[0].can_be_true;
return rpn_stack[0];
}


BoolMask KeyCondition::checkInRange(
size_t used_key_size,
const Field * left_key,
const Field * right_key,
const DataTypes & data_types,
BoolMask initial_mask) const
{
return checkInRange(used_key_size, left_key, right_key, data_types, true, initial_mask);
}


bool KeyCondition::mayBeTrueInRange(
size_t used_key_size, const Field * left_key, const Field * right_key, const DataTypes & data_types) const
size_t used_key_size,
const Field * left_key,
const Field * right_key,
const DataTypes & data_types) const
{
return mayBeTrueInRange(used_key_size, left_key, right_key, data_types, true);
return checkInRange(used_key_size, left_key, right_key, data_types, true, BoolMask::consider_only_can_be_true).can_be_true;
}


BoolMask KeyCondition::checkAfter(
size_t used_key_size,
const Field * left_key,
const DataTypes & data_types,
BoolMask initial_mask) const
{
return checkInRange(used_key_size, left_key, nullptr, data_types, false, initial_mask);
}


bool KeyCondition::mayBeTrueAfter(
size_t used_key_size, const Field * left_key, const DataTypes & data_types) const
size_t used_key_size,
const Field * left_key,
const DataTypes & data_types) const
{
return mayBeTrueInRange(used_key_size, left_key, nullptr, data_types, false);
return checkInRange(used_key_size, left_key, nullptr, data_types, false, BoolMask::consider_only_can_be_true).can_be_true;
}


Expand Down
47 changes: 38 additions & 9 deletions dbms/src/Storages/MergeTree/KeyCondition.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,17 +235,45 @@ class KeyCondition
const Names & key_column_names,
const ExpressionActionsPtr & key_expr);

/// Whether the condition is feasible in the key range.
/// Whether the condition and its negation are feasible in the direct product of single column ranges specified by `parallelogram`.
BoolMask checkInParallelogram(
const std::vector<Range> & parallelogram,
const DataTypes & data_types) const;

/// Whether the condition and its negation are (independently) feasible in the key range.
/// left_key and right_key must contain all fields in the sort_descr in the appropriate order.
/// data_types - the types of the key columns.
bool mayBeTrueInRange(size_t used_key_size, const Field * left_key, const Field * right_key, const DataTypes & data_types) const;

/// Whether the condition is feasible in the direct product of single column ranges specified by `parallelogram`.
bool mayBeTrueInParallelogram(const std::vector<Range> & parallelogram, const DataTypes & data_types) const;
/// Argument initial_mask is used for early exiting the implementation when we do not care about
/// one of the resulting mask components (see BoolMask::consider_only_can_be_XXX).
BoolMask checkInRange(
size_t used_key_size,
const Field * left_key,
const Field * right_key,
const DataTypes & data_types,
BoolMask initial_mask = BoolMask(false, false)) const;

/// Is the condition valid in a semi-infinite (not limited to the right) key range.
/// Are the condition and its negation valid in a semi-infinite (not limited to the right) key range.
/// left_key must contain all the fields in the sort_descr in the appropriate order.
bool mayBeTrueAfter(size_t used_key_size, const Field * left_key, const DataTypes & data_types) const;
BoolMask checkAfter(
size_t used_key_size,
const Field * left_key,
const DataTypes & data_types,
BoolMask initial_mask = BoolMask(false, false)) const;

/// Same as checkInRange, but calculate only may_be_true component of a result.
/// This is more efficient than checkInRange(...).can_be_true.
bool mayBeTrueInRange(
size_t used_key_size,
const Field * left_key,
const Field * right_key,
const DataTypes & data_types) const;

/// Same as checkAfter, but calculate only may_be_true component of a result.
/// This is more efficient than checkAfter(...).can_be_true.
bool mayBeTrueAfter(
size_t used_key_size,
const Field * left_key,
const DataTypes & data_types) const;

/// Checks that the index can not be used.
bool alwaysUnknownOrTrue() const;
Expand Down Expand Up @@ -330,12 +358,13 @@ class KeyCondition
static const AtomMap atom_map;

private:
bool mayBeTrueInRange(
BoolMask checkInRange(
size_t used_key_size,
const Field * left_key,
const Field * right_key,
const DataTypes & data_types,
bool right_bounded) const;
bool right_bounded,
BoolMask initial_mask) const;

void traverseAST(const ASTPtr & node, const Context & context, Block & block_with_constants);
bool atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out);
Expand Down
4 changes: 2 additions & 2 deletions dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,8 +276,8 @@ Pipes MergeTreeDataSelectExecutor::readFromParts(
if (part->isEmpty())
continue;

if (minmax_idx_condition && !minmax_idx_condition->mayBeTrueInParallelogram(
part->minmax_idx.parallelogram, data.minmax_idx_column_types))
if (minmax_idx_condition && !minmax_idx_condition->checkInParallelogram(
part->minmax_idx.parallelogram, data.minmax_idx_column_types).can_be_true)
continue;

if (max_block_numbers_to_read)
Expand Down
4 changes: 2 additions & 2 deletions dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -378,11 +378,11 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
rpn_stack.emplace_back(true, false);
}
else
throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR);
throw Exception("Unexpected function type in BloomFilterCondition::RPNElement", ErrorCodes::LOGICAL_ERROR);
}

if (rpn_stack.size() != 1)
throw Exception("Unexpected stack size in KeyCondition::mayBeTrueInRange", ErrorCodes::LOGICAL_ERROR);
throw Exception("Unexpected stack size in BloomFilterCondition::mayBeTrueOnGranule", ErrorCodes::LOGICAL_ERROR);

return rpn_stack[0].can_be_true;
}
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ bool MergeTreeIndexConditionMinMax::mayBeTrueOnGranule(MergeTreeIndexGranulePtr
for (const auto & range : granule->parallelogram)
if (range.left.isNull() || range.right.isNull())
return true;
return condition.mayBeTrueInParallelogram(granule->parallelogram, index.data_types);
return condition.checkInParallelogram(granule->parallelogram, index.data_types).can_be_true;
}


Expand Down

0 comments on commit 2d3321e

Please sign in to comment.