Skip to content

Commit

Permalink
Simplify bounds/range vs selectors/equality logic in SQL planning. (a…
Browse files Browse the repository at this point in the history
…pache#14619)

* Simplify bounds/range vs selectors/equality logic in SQL planning.

1) Consolidate duplicate code related to Expressions#buildTimeFloorFilter.

2) Cleaner logic in Expressions#toSimpleLeafFilter: choose bounds vs range
   filter based solely on plannerContext.isUseBoundsAndSelectors, not also
   considering rhs kind. Use parsed rhs in both paths (except for numerics
   in the bound path).

3) Fix ArrayContains, ArrayOverlap to avoid equality filters when there is
   an extractionFn present. Fixes a bug introduced in apache#14612.

* Avoid sending nonprimitives down the bound path.
  • Loading branch information
gianm authored Jul 20, 2023
1 parent 01e9a39 commit c2e6758
Show file tree
Hide file tree
Showing 7 changed files with 159 additions and 108 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.type.SqlTypeFamily;
Expand Down Expand Up @@ -519,33 +520,29 @@ private static DimFilter toSimpleLeafFilter(
// column instead for filtering to ensure that results are correct
if (druidExpression.isSimpleExtraction() &&
!(isOutputNumeric && !rowSignature.isNumeric(druidExpression.getDirectColumn()))) {
if (!plannerContext.isUseBoundsAndSelectors()) {
if (druidExpression.getSimpleExtraction().getExtractionFn() != null) {
// return null to fallback to using an expression filter
return null;
}
equalFilter = NullFilter.forColumn(druidExpression.getDirectColumn());
} else {
if (plannerContext.isUseBoundsAndSelectors()) {
equalFilter = new SelectorDimFilter(
druidExpression.getSimpleExtraction().getColumn(),
NullHandling.defaultStringValue(),
druidExpression.getSimpleExtraction().getExtractionFn()
);
} else {
if (druidExpression.getSimpleExtraction().getExtractionFn() != null) {
// return null to fallback to using an expression filter
return null;
}
equalFilter = NullFilter.forColumn(druidExpression.getDirectColumn());
}
} else if (virtualColumnRegistry != null) {
final String virtualColumn = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(
druidExpression,
operand.getType()
);

if (!plannerContext.isUseBoundsAndSelectors()) {
equalFilter = NullFilter.forColumn(virtualColumn);
if (plannerContext.isUseBoundsAndSelectors()) {
equalFilter = new SelectorDimFilter(virtualColumn, NullHandling.defaultStringValue(), null);
} else {
equalFilter = new SelectorDimFilter(
virtualColumn,
NullHandling.defaultStringValue(),
null
);
equalFilter = NullFilter.forColumn(virtualColumn);
}
} else {
return null;
Expand Down Expand Up @@ -601,11 +598,11 @@ private static DimFilter toSimpleLeafFilter(
}

final DruidExpression rhsExpression = toDruidExpression(plannerContext, rowSignature, rhs);
final Expr parsedRhsExpression = rhsExpression != null
final Expr rhsParsed = rhsExpression != null
? plannerContext.parseExpression(rhsExpression.getExpression())
: null;
// rhs must be a literal
if (rhs.getKind() != SqlKind.LITERAL && (parsedRhsExpression == null || !parsedRhsExpression.isLiteral())) {
if (rhsParsed == null || !rhsParsed.isLiteral()) {
return null;
}

Expand Down Expand Up @@ -651,43 +648,37 @@ private static DimFilter toSimpleLeafFilter(

final Granularity granularity = ExtractionFns.toQueryGranularity(extractionFn);
if (granularity != null) {
// lhs is FLOOR(__time TO granularity); rhs must be a timestamp
// lhs is FLOOR(__time TO granularity); rhs must be a timestamp.
final long rhsMillis = Calcites.calciteDateTimeLiteralToJoda(rhs, plannerContext.getTimeZone()).getMillis();
final Interval rhsInterval = granularity.bucket(DateTimes.utc(rhsMillis));

// Is rhs aligned on granularity boundaries?
final boolean rhsAligned = rhsInterval.getStartMillis() == rhsMillis;
return buildTimeFloorFilter(column, granularity, flippedKind, rhsMillis, plannerContext);
}
}

if (plannerContext.isUseBoundsAndSelectors()) {
// Create a BoundRefKey that strips the extractionFn and compares __time as a number.
final BoundRefKey boundRefKey = new BoundRefKey(column, null, StringComparators.NUMERIC);
final ColumnType matchValueType = Calcites.getColumnTypeForRelDataType(rhs.getType());

return getBoundTimeDimFilter(flippedKind, boundRefKey, rhsInterval, rhsAligned);
} else {
final RangeRefKey rangeRefKey = new RangeRefKey(column, ColumnType.LONG);
return getRangeTimeDimFilter(flippedKind, rangeRefKey, rhsInterval, rhsAligned);
}
if (plannerContext.isUseBoundsAndSelectors()) {
if (matchValueType == null || !matchValueType.isPrimitive()) {
// Fall back to expression filter.
return null;
}
}

if (plannerContext.isUseBoundsAndSelectors() && rhs instanceof RexLiteral) {
final String val;
final RexLiteral rhsLiteral = (RexLiteral) rhs;
if (SqlTypeName.NUMERIC_TYPES.contains(rhsLiteral.getTypeName())) {
val = String.valueOf(RexLiteral.value(rhsLiteral));
} else if (SqlTypeName.CHAR_TYPES.contains(rhsLiteral.getTypeName())) {
val = String.valueOf(RexLiteral.stringValue(rhsLiteral));
} else if (SqlTypeName.TIMESTAMP == rhsLiteral.getTypeName() || SqlTypeName.DATE == rhsLiteral.getTypeName()) {
val = String.valueOf(
Calcites.calciteDateTimeLiteralToJoda(
rhsLiteral,
plannerContext.getTimeZone()
).getMillis()
);
final String stringVal;

if (rhsParsed.getLiteralValue() == null) {
stringVal = NullHandling.defaultStringValue();
} else if (RexUtil.isLiteral(rhs, true) && SqlTypeName.NUMERIC_TYPES.contains(rhs.getType().getSqlTypeName())) {
// Peek inside the original rhs for numerics, rather than using the parsed version, for highest fidelity
// to what the query originally contained. (It may be a BigDecimal.)
stringVal = String.valueOf(RexLiteral.value(rhs));
} else {
// Don't know how to filter on this kind of literal.
stringVal = String.valueOf(rhsParsed.getLiteralValue());
}

if (stringVal == null) {
// Fall back to expression filter.
return null;
}

// Numeric lhs needs a numeric comparison.
final StringComparator comparator = Calcites.getStringComparatorForRelDataType(lhs.getType());
final BoundRefKey boundRefKey = new BoundRefKey(column, extractionFn, comparator);
Expand All @@ -696,43 +687,36 @@ private static DimFilter toSimpleLeafFilter(
// Always use BoundDimFilters, to simplify filter optimization later (it helps to remember the comparator).
switch (flippedKind) {
case EQUALS:
filter = Bounds.equalTo(boundRefKey, val);
filter = Bounds.equalTo(boundRefKey, stringVal);
break;
case NOT_EQUALS:
filter = new NotDimFilter(Bounds.equalTo(boundRefKey, val));
filter = new NotDimFilter(Bounds.equalTo(boundRefKey, stringVal));
break;
case GREATER_THAN:
filter = Bounds.greaterThan(boundRefKey, val);
filter = Bounds.greaterThan(boundRefKey, stringVal);
break;
case GREATER_THAN_OR_EQUAL:
filter = Bounds.greaterThanOrEqualTo(boundRefKey, val);
filter = Bounds.greaterThanOrEqualTo(boundRefKey, stringVal);
break;
case LESS_THAN:
filter = Bounds.lessThan(boundRefKey, val);
filter = Bounds.lessThan(boundRefKey, stringVal);
break;
case LESS_THAN_OR_EQUAL:
filter = Bounds.lessThanOrEqualTo(boundRefKey, val);
filter = Bounds.lessThanOrEqualTo(boundRefKey, stringVal);
break;
default:
throw new IllegalStateException("Shouldn't have got here");
}

return filter;
} else {
//noinspection VariableNotUsedInsideIf
if (extractionFn != null) {
final Object val = rhsParsed.getLiteralValue();

if (extractionFn != null || val == null) {
// fall back to expression filter
return null;
}
final Object val;
if (parsedRhsExpression != null && parsedRhsExpression.isLiteral()) {
val = parsedRhsExpression.getLiteralValue();
} else {
// Don't know how to filter on this kind of literal.
return null;
}

final ColumnType matchValueType = Calcites.getColumnTypeForRelDataType(rhs.getType());
final RangeRefKey rangeRefKey = new RangeRefKey(column, matchValueType);
final DimFilter filter;

Expand Down Expand Up @@ -850,26 +834,20 @@ private static DimFilter buildTimeFloorFilter(
final PlannerContext plannerContext
)
{
if (plannerContext.isUseBoundsAndSelectors()) {
final BoundRefKey boundRefKey = new BoundRefKey(column, null, StringComparators.NUMERIC);
final Interval rhsInterval = granularity.bucket(DateTimes.utc(rhsMillis));
final Interval rhsInterval = granularity.bucket(DateTimes.utc(rhsMillis));

// Is rhs aligned on granularity boundaries?
final boolean rhsAligned = rhsInterval.getStartMillis() == rhsMillis;
// Is rhs aligned on granularity boundaries?
final boolean rhsAligned = rhsInterval.getStartMillis() == rhsMillis;

if (plannerContext.isUseBoundsAndSelectors()) {
final BoundRefKey boundRefKey = new BoundRefKey(column, null, StringComparators.NUMERIC);
return getBoundTimeDimFilter(operatorKind, boundRefKey, rhsInterval, rhsAligned);
} else {
final RangeRefKey rangeRefKey = new RangeRefKey(column, ColumnType.LONG);
final Interval rhsInterval = granularity.bucket(DateTimes.utc(rhsMillis));

// Is rhs aligned on granularity boundaries?
final boolean rhsAligned = rhsInterval.getStartMillis() == rhsMillis;

return getRangeTimeDimFilter(operatorKind, rangeRefKey, rhsInterval, rhsAligned);
}
}


private static DimFilter getBoundTimeDimFilter(
SqlKind operatorKind,
BoundRefKey boundRefKey,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,8 @@
import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry;

import javax.annotation.Nullable;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;

public class ArrayContainsOperatorConversion extends BaseExpressionDimFilterOperatorConversion
{
Expand Down Expand Up @@ -98,7 +97,9 @@ public DimFilter toDruidFilter(

// if the input column is not actually an ARRAY type, but rather an MVD, we can optimize this into
// selector/equality filters on the individual array elements
if (leftExpr.isSimpleExtraction() && !(leftExpr.isDirectColumnAccess() && leftExpr.getDruidType() != null && leftExpr.getDruidType().isArray())) {
if (leftExpr.isSimpleExtraction()
&& !isArray(leftExpr)
&& (plannerContext.isUseBoundsAndSelectors() || leftExpr.isDirectColumnAccess())) {
Expr expr = plannerContext.parseExpression(rightExpr.getExpression());
// To convert this expression filter into an And of Selector filters, we need to extract all array elements.
// For now, we can optimize only when rightExpr is a literal because there is no way to extract the array elements
Expand All @@ -115,37 +116,34 @@ public DimFilter toDruidFilter(
// However, since both Calcite's SqlMultisetValueConstructor and Druid's ArrayConstructorFunction don't allow
// to create an empty array with no argument, we just return null.
return null;
} else if (arrayElements.length == 1) {
if (plannerContext.isUseBoundsAndSelectors()) {
return newSelectorDimFilter(leftExpr.getSimpleExtraction(), Evals.asString(arrayElements[0]));
} else {
return new EqualityFilter(
leftExpr.getSimpleExtraction().getColumn(),
ExpressionType.toColumnType(ExpressionType.elementType(exprEval.type())),
arrayElements[0],
null
);
}
} else {
final List<DimFilter> selectFilters = Arrays
.stream(arrayElements)
.map(val -> {
if (plannerContext.isUseBoundsAndSelectors()) {
return newSelectorDimFilter(leftExpr.getSimpleExtraction(), Evals.asString(val));
} else {
return new EqualityFilter(
final List<DimFilter> filters = new ArrayList<>(arrayElements.length);

for (final Object val : arrayElements) {
if (plannerContext.isUseBoundsAndSelectors()) {
filters.add(newSelectorDimFilter(leftExpr.getSimpleExtraction(), Evals.asString(val)));
} else {
// Cannot handle extractionFn here. We won't get one due to the isDirectColumnAccess check above.
filters.add(
new EqualityFilter(
leftExpr.getSimpleExtraction().getColumn(),
ExpressionType.toColumnType(ExpressionType.elementType(exprEval.type())),
val,
null
);
}
})
.collect(Collectors.toList());
return new AndDimFilter(selectFilters);
)
);
}
}

return filters.size() == 1 ? filters.get(0) : new AndDimFilter(filters);
}
}
}
return toExpressionFilter(plannerContext, getDruidFunctionName(), druidExpressions);
}

private static boolean isArray(final DruidExpression expr)
{
return expr.getDruidType() != null && expr.getDruidType().isArray();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ public DimFilter toDruidFilter(
}

Expr expr = plannerContext.parseExpression(complexExpr.getExpression());
if (expr.isLiteral()) {
if (expr.isLiteral() && (plannerContext.isUseBoundsAndSelectors() || simpleExtractionExpr.isDirectColumnAccess())) {
// Evaluate the expression to take out the array elements.
// We can safely pass null if the expression is literal.
ExprEval<?> exprEval = expr.eval(InputBindings.nilBindings());
Expand All @@ -129,6 +129,7 @@ public DimFilter toDruidFilter(
if (plannerContext.isUseBoundsAndSelectors()) {
return newSelectorDimFilter(simpleExtractionExpr.getSimpleExtraction(), Evals.asString(arrayElements[0]));
} else {
// Cannot handle extractionFn here. We won't get one due to the isDirectColumnAccess check above.
return new EqualityFilter(
simpleExtractionExpr.getSimpleExtraction().getColumn(),
ExpressionType.toColumnType(exprEval.type()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,7 @@ public DimFilter toDruidFilter(
DimFilter thenFilter = null, elseFilter = null;
final DimFilter isNull;
if (plannerContext.isUseBoundsAndSelectors()) {
isNull = new SelectorDimFilter(
condition.getArguments().get(0).getDirectColumn(),
null,
null
);
isNull = new SelectorDimFilter(condition.getArguments().get(0).getDirectColumn(), null, null);
} else {
isNull = NullFilter.forColumn(condition.getArguments().get(0).getDirectColumn());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.emitter.EmittingLogger;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.math.expr.ExpressionProcessingConfig;
import org.apache.druid.query.ordering.StringComparator;
import org.apache.druid.query.ordering.StringComparators;
import org.apache.druid.segment.column.ColumnType;
Expand Down Expand Up @@ -132,9 +133,15 @@ public static String escapeStringLiteral(final String s)
}

/**
* Convert {@link RelDataType} to the most appropriate {@link ValueType}
* Caller who want to coerce all ARRAY types to STRING can set `druid.expressions.allowArrayToStringCast`
* runtime property in {@link org.apache.druid.math.expr.ExpressionProcessingConfig}
* Convert {@link RelDataType} to the most appropriate {@link ColumnType}, or null if there is no {@link ColumnType}
* that is appropriate for this {@link RelDataType}.
*
* Equivalent to {@link #getValueTypeForRelDataTypeFull(RelDataType)}, except this method returns
* {@link ColumnType#STRING} when {@link ColumnType#isArray()} if
* {@link ExpressionProcessingConfig#processArraysAsMultiValueStrings()} is set via the server property
* {@code druid.expressions.allowArrayToStringCast}.
*
* @return type, or null if there is no matching type
*/
@Nullable
public static ColumnType getColumnTypeForRelDataType(final RelDataType type)
Expand All @@ -148,7 +155,13 @@ public static ColumnType getColumnTypeForRelDataType(final RelDataType type)
}

/**
* Convert {@link RelDataType} to the most appropriate {@link ValueType}
* Convert {@link RelDataType} to the most appropriate {@link ColumnType}, or null if there is no {@link ColumnType}
* that is appropriate for this {@link RelDataType}.
*
* Equivalent to {@link #getColumnTypeForRelDataType(RelDataType)}, but ignores
* {@link ExpressionProcessingConfig#processArraysAsMultiValueStrings()} (and acts as if it is false).
*
* @return type, or null if there is no matching type
*/
@Nullable
public static ColumnType getValueTypeForRelDataTypeFull(final RelDataType type)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,14 @@ public boolean isStringifyArrays()
return stringifyArrays;
}

/**
* Whether we should use {@link org.apache.druid.query.filter.BoundDimFilter} and
* {@link org.apache.druid.query.filter.SelectorDimFilter} (true) or {@link org.apache.druid.query.filter.RangeFilter},
* {@link org.apache.druid.query.filter.EqualityFilter}, and {@link org.apache.druid.query.filter.NullFilter} (false).
*
* Typically true when {@link NullHandling#replaceWithDefault()} and false when {@link NullHandling#sqlCompatible()}.
* Can be overriden by the undocumented context parameter {@link #CTX_SQL_USE_BOUNDS_AND_SELECTORS}.
*/
public boolean isUseBoundsAndSelectors()
{
return useBoundsAndSelectors;
Expand Down
Loading

0 comments on commit c2e6758

Please sign in to comment.