Simplify bounds/range vs selectors/equality logic in SQL planning. (a…

…pache#14619) * Simplify bounds/range vs selectors/equality logic in SQL planning. 1) Consolidate duplicate code related to Expressions#buildTimeFloorFilter. 2) Cleaner logic in Expressions#toSimpleLeafFilter: choose bounds vs range filter based solely on plannerContext.isUseBoundsAndSelectors, not also considering rhs kind. Use parsed rhs in both paths (except for numerics in the bound path). 3) Fix ArrayContains, ArrayOverlap to avoid equality filters when there is an extractionFn present. Fixes a bug introduced in apache#14612. * Avoid sending nonprimitives down the bound path.
xvrl · Jul 20, 2023 · c2e6758 · c2e6758
1 parent 01e9a39
commit c2e6758
Show file tree

Hide file tree

Showing 7 changed files with 159 additions and 108 deletions.
diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java
@@ -28,6 +28,7 @@
 import org.apache.calcite.rex.RexInputRef;
 import org.apache.calcite.rex.RexLiteral;
 import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
 import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.sql.SqlOperator;
 import org.apache.calcite.sql.type.SqlTypeFamily;
@@ -519,33 +520,29 @@ private static DimFilter toSimpleLeafFilter(
       // column instead for filtering to ensure that results are correct
       if (druidExpression.isSimpleExtraction() &&
           !(isOutputNumeric && !rowSignature.isNumeric(druidExpression.getDirectColumn()))) {
-        if (!plannerContext.isUseBoundsAndSelectors()) {
-          if (druidExpression.getSimpleExtraction().getExtractionFn() != null) {
-            // return null to fallback to using an expression filter
-            return null;
-          }
-          equalFilter = NullFilter.forColumn(druidExpression.getDirectColumn());
-        } else {
+        if (plannerContext.isUseBoundsAndSelectors()) {
           equalFilter = new SelectorDimFilter(
               druidExpression.getSimpleExtraction().getColumn(),
               NullHandling.defaultStringValue(),
               druidExpression.getSimpleExtraction().getExtractionFn()
           );
+        } else {
+          if (druidExpression.getSimpleExtraction().getExtractionFn() != null) {
+            // return null to fallback to using an expression filter
+            return null;
+          }
+          equalFilter = NullFilter.forColumn(druidExpression.getDirectColumn());
         }
       } else if (virtualColumnRegistry != null) {
         final String virtualColumn = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(
             druidExpression,
             operand.getType()
         );
 
-        if (!plannerContext.isUseBoundsAndSelectors()) {
-          equalFilter = NullFilter.forColumn(virtualColumn);
+        if (plannerContext.isUseBoundsAndSelectors()) {
+          equalFilter = new SelectorDimFilter(virtualColumn, NullHandling.defaultStringValue(), null);
         } else {
-          equalFilter = new SelectorDimFilter(
-              virtualColumn,
-              NullHandling.defaultStringValue(),
-              null
-          );
+          equalFilter = NullFilter.forColumn(virtualColumn);
         }
       } else {
         return null;
@@ -601,11 +598,11 @@ private static DimFilter toSimpleLeafFilter(
       }
 
       final DruidExpression rhsExpression = toDruidExpression(plannerContext, rowSignature, rhs);
-      final Expr parsedRhsExpression = rhsExpression != null
+      final Expr rhsParsed = rhsExpression != null
                                        ? plannerContext.parseExpression(rhsExpression.getExpression())
                                        : null;
       // rhs must be a literal
-      if (rhs.getKind() != SqlKind.LITERAL && (parsedRhsExpression == null || !parsedRhsExpression.isLiteral())) {
+      if (rhsParsed == null || !rhsParsed.isLiteral()) {
         return null;
       }
 
@@ -651,43 +648,37 @@ private static DimFilter toSimpleLeafFilter(
 
         final Granularity granularity = ExtractionFns.toQueryGranularity(extractionFn);
         if (granularity != null) {
-          // lhs is FLOOR(__time TO granularity); rhs must be a timestamp
+          // lhs is FLOOR(__time TO granularity); rhs must be a timestamp.
           final long rhsMillis = Calcites.calciteDateTimeLiteralToJoda(rhs, plannerContext.getTimeZone()).getMillis();
-          final Interval rhsInterval = granularity.bucket(DateTimes.utc(rhsMillis));
-
-          // Is rhs aligned on granularity boundaries?
-          final boolean rhsAligned = rhsInterval.getStartMillis() == rhsMillis;
+          return buildTimeFloorFilter(column, granularity, flippedKind, rhsMillis, plannerContext);
+        }
+      }
 
-          if (plannerContext.isUseBoundsAndSelectors()) {
-            // Create a BoundRefKey that strips the extractionFn and compares __time as a number.
-            final BoundRefKey boundRefKey = new BoundRefKey(column, null, StringComparators.NUMERIC);
+      final ColumnType matchValueType = Calcites.getColumnTypeForRelDataType(rhs.getType());
 
-            return getBoundTimeDimFilter(flippedKind, boundRefKey, rhsInterval, rhsAligned);
-          } else {
-            final RangeRefKey rangeRefKey = new RangeRefKey(column, ColumnType.LONG);
-            return getRangeTimeDimFilter(flippedKind, rangeRefKey, rhsInterval, rhsAligned);
-          }
+      if (plannerContext.isUseBoundsAndSelectors()) {
+        if (matchValueType == null || !matchValueType.isPrimitive()) {
+          // Fall back to expression filter.
+          return null;
         }
-      }
 
-      if (plannerContext.isUseBoundsAndSelectors() && rhs instanceof RexLiteral) {
-        final String val;
-        final RexLiteral rhsLiteral = (RexLiteral) rhs;
-        if (SqlTypeName.NUMERIC_TYPES.contains(rhsLiteral.getTypeName())) {
-          val = String.valueOf(RexLiteral.value(rhsLiteral));
-        } else if (SqlTypeName.CHAR_TYPES.contains(rhsLiteral.getTypeName())) {
-          val = String.valueOf(RexLiteral.stringValue(rhsLiteral));
-        } else if (SqlTypeName.TIMESTAMP == rhsLiteral.getTypeName() || SqlTypeName.DATE == rhsLiteral.getTypeName()) {
-          val = String.valueOf(
-              Calcites.calciteDateTimeLiteralToJoda(
-                  rhsLiteral,
-                  plannerContext.getTimeZone()
-              ).getMillis()
-          );
+        final String stringVal;
+
+        if (rhsParsed.getLiteralValue() == null) {
+          stringVal = NullHandling.defaultStringValue();
+        } else if (RexUtil.isLiteral(rhs, true) && SqlTypeName.NUMERIC_TYPES.contains(rhs.getType().getSqlTypeName())) {
+          // Peek inside the original rhs for numerics, rather than using the parsed version, for highest fidelity
+          // to what the query originally contained. (It may be a BigDecimal.)
+          stringVal = String.valueOf(RexLiteral.value(rhs));
         } else {
-          // Don't know how to filter on this kind of literal.
+          stringVal = String.valueOf(rhsParsed.getLiteralValue());
+        }
+
+        if (stringVal == null) {
+          // Fall back to expression filter.
           return null;
         }
+
         // Numeric lhs needs a numeric comparison.
         final StringComparator comparator = Calcites.getStringComparatorForRelDataType(lhs.getType());
         final BoundRefKey boundRefKey = new BoundRefKey(column, extractionFn, comparator);
@@ -696,43 +687,36 @@ private static DimFilter toSimpleLeafFilter(
         // Always use BoundDimFilters, to simplify filter optimization later (it helps to remember the comparator).
         switch (flippedKind) {
           case EQUALS:
-            filter = Bounds.equalTo(boundRefKey, val);
+            filter = Bounds.equalTo(boundRefKey, stringVal);
             break;
           case NOT_EQUALS:
-            filter = new NotDimFilter(Bounds.equalTo(boundRefKey, val));
+            filter = new NotDimFilter(Bounds.equalTo(boundRefKey, stringVal));
             break;
           case GREATER_THAN:
-            filter = Bounds.greaterThan(boundRefKey, val);
+            filter = Bounds.greaterThan(boundRefKey, stringVal);
             break;
           case GREATER_THAN_OR_EQUAL:
-            filter = Bounds.greaterThanOrEqualTo(boundRefKey, val);
+            filter = Bounds.greaterThanOrEqualTo(boundRefKey, stringVal);
             break;
           case LESS_THAN:
-            filter = Bounds.lessThan(boundRefKey, val);
+            filter = Bounds.lessThan(boundRefKey, stringVal);
             break;
           case LESS_THAN_OR_EQUAL:
-            filter = Bounds.lessThanOrEqualTo(boundRefKey, val);
+            filter = Bounds.lessThanOrEqualTo(boundRefKey, stringVal);
             break;
           default:
             throw new IllegalStateException("Shouldn't have got here");
         }
 
         return filter;
       } else {
-        //noinspection VariableNotUsedInsideIf
-        if (extractionFn != null) {
+        final Object val = rhsParsed.getLiteralValue();
+
+        if (extractionFn != null || val == null) {
           // fall back to expression filter
           return null;
         }
-        final Object val;
-        if (parsedRhsExpression != null && parsedRhsExpression.isLiteral()) {
-          val = parsedRhsExpression.getLiteralValue();
-        } else {
-          // Don't know how to filter on this kind of literal.
-          return null;
-        }
 
-        final ColumnType matchValueType = Calcites.getColumnTypeForRelDataType(rhs.getType());
         final RangeRefKey rangeRefKey = new RangeRefKey(column, matchValueType);
         final DimFilter filter;
 
@@ -850,26 +834,20 @@ private static DimFilter buildTimeFloorFilter(
       final PlannerContext plannerContext
   )
   {
-    if (plannerContext.isUseBoundsAndSelectors()) {
-      final BoundRefKey boundRefKey = new BoundRefKey(column, null, StringComparators.NUMERIC);
-      final Interval rhsInterval = granularity.bucket(DateTimes.utc(rhsMillis));
+    final Interval rhsInterval = granularity.bucket(DateTimes.utc(rhsMillis));
 
-      // Is rhs aligned on granularity boundaries?
-      final boolean rhsAligned = rhsInterval.getStartMillis() == rhsMillis;
+    // Is rhs aligned on granularity boundaries?
+    final boolean rhsAligned = rhsInterval.getStartMillis() == rhsMillis;
 
+    if (plannerContext.isUseBoundsAndSelectors()) {
+      final BoundRefKey boundRefKey = new BoundRefKey(column, null, StringComparators.NUMERIC);
       return getBoundTimeDimFilter(operatorKind, boundRefKey, rhsInterval, rhsAligned);
     } else {
       final RangeRefKey rangeRefKey = new RangeRefKey(column, ColumnType.LONG);
-      final Interval rhsInterval = granularity.bucket(DateTimes.utc(rhsMillis));
-
-      // Is rhs aligned on granularity boundaries?
-      final boolean rhsAligned = rhsInterval.getStartMillis() == rhsMillis;
-
       return getRangeTimeDimFilter(operatorKind, rangeRefKey, rhsInterval, rhsAligned);
     }
   }
 
-
   private static DimFilter getBoundTimeDimFilter(
       SqlKind operatorKind,
       BoundRefKey boundRefKey,

diff --git a/...java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java b/...java/org/apache/druid/sql/calcite/expression/builtin/ArrayContainsOperatorConversion.java
@@ -41,9 +41,8 @@
 import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry;
 
 import javax.annotation.Nullable;
-import java.util.Arrays;
+import java.util.ArrayList;
 import java.util.List;
-import java.util.stream.Collectors;
 
 public class ArrayContainsOperatorConversion extends BaseExpressionDimFilterOperatorConversion
 {
@@ -98,7 +97,9 @@ public DimFilter toDruidFilter(
 
     // if the input column is not actually an ARRAY type, but rather an MVD, we can optimize this into
     // selector/equality filters on the individual array elements
-    if (leftExpr.isSimpleExtraction() && !(leftExpr.isDirectColumnAccess() && leftExpr.getDruidType() != null && leftExpr.getDruidType().isArray())) {
+    if (leftExpr.isSimpleExtraction()
+        && !isArray(leftExpr)
+        && (plannerContext.isUseBoundsAndSelectors() || leftExpr.isDirectColumnAccess())) {
       Expr expr = plannerContext.parseExpression(rightExpr.getExpression());
       // To convert this expression filter into an And of Selector filters, we need to extract all array elements.
       // For now, we can optimize only when rightExpr is a literal because there is no way to extract the array elements
@@ -115,37 +116,34 @@ public DimFilter toDruidFilter(
           // However, since both Calcite's SqlMultisetValueConstructor and Druid's ArrayConstructorFunction don't allow
           // to create an empty array with no argument, we just return null.
           return null;
-        } else if (arrayElements.length == 1) {
-          if (plannerContext.isUseBoundsAndSelectors()) {
-            return newSelectorDimFilter(leftExpr.getSimpleExtraction(), Evals.asString(arrayElements[0]));
-          } else {
-            return new EqualityFilter(
-                leftExpr.getSimpleExtraction().getColumn(),
-                ExpressionType.toColumnType(ExpressionType.elementType(exprEval.type())),
-                arrayElements[0],
-                null
-            );
-          }
         } else {
-          final List<DimFilter> selectFilters = Arrays
-              .stream(arrayElements)
-              .map(val -> {
-                if (plannerContext.isUseBoundsAndSelectors()) {
-                  return newSelectorDimFilter(leftExpr.getSimpleExtraction(), Evals.asString(val));
-                } else {
-                  return new EqualityFilter(
+          final List<DimFilter> filters = new ArrayList<>(arrayElements.length);
+
+          for (final Object val : arrayElements) {
+            if (plannerContext.isUseBoundsAndSelectors()) {
+              filters.add(newSelectorDimFilter(leftExpr.getSimpleExtraction(), Evals.asString(val)));
+            } else {
+              // Cannot handle extractionFn here. We won't get one due to the isDirectColumnAccess check above.
+              filters.add(
+                  new EqualityFilter(
                       leftExpr.getSimpleExtraction().getColumn(),
                       ExpressionType.toColumnType(ExpressionType.elementType(exprEval.type())),
                       val,
                       null
-                  );
-                }
-              })
-              .collect(Collectors.toList());
-          return new AndDimFilter(selectFilters);
+                  )
+              );
+            }
+          }
+
+          return filters.size() == 1 ? filters.get(0) : new AndDimFilter(filters);
         }
       }
     }
     return toExpressionFilter(plannerContext, getDruidFunctionName(), druidExpressions);
   }
+
+  private static boolean isArray(final DruidExpression expr)
+  {
+    return expr.getDruidType() != null && expr.getDruidType().isArray();
+  }
 }
diff --git a/.../java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java b/.../java/org/apache/druid/sql/calcite/expression/builtin/ArrayOverlapOperatorConversion.java
@@ -114,7 +114,7 @@ public DimFilter toDruidFilter(
     }
 
     Expr expr = plannerContext.parseExpression(complexExpr.getExpression());
-    if (expr.isLiteral()) {
+    if (expr.isLiteral() && (plannerContext.isUseBoundsAndSelectors() || simpleExtractionExpr.isDirectColumnAccess())) {
       // Evaluate the expression to take out the array elements.
       // We can safely pass null if the expression is literal.
       ExprEval<?> exprEval = expr.eval(InputBindings.nilBindings());
@@ -129,6 +129,7 @@ public DimFilter toDruidFilter(
         if (plannerContext.isUseBoundsAndSelectors()) {
           return newSelectorDimFilter(simpleExtractionExpr.getSimpleExtraction(), Evals.asString(arrayElements[0]));
         } else {
+          // Cannot handle extractionFn here. We won't get one due to the isDirectColumnAccess check above.
           return new EqualityFilter(
               simpleExtractionExpr.getSimpleExtraction().getColumn(),
               ExpressionType.toColumnType(exprEval.type()),

diff --git a/...src/main/java/org/apache/druid/sql/calcite/expression/builtin/CaseOperatorConversion.java b/...src/main/java/org/apache/druid/sql/calcite/expression/builtin/CaseOperatorConversion.java
@@ -119,11 +119,7 @@ public DimFilter toDruidFilter(
         DimFilter thenFilter = null, elseFilter = null;
         final DimFilter isNull;
         if (plannerContext.isUseBoundsAndSelectors()) {
-          isNull = new SelectorDimFilter(
-              condition.getArguments().get(0).getDirectColumn(),
-              null,
-              null
-          );
+          isNull = new SelectorDimFilter(condition.getArguments().get(0).getDirectColumn(), null, null);
         } else {
           isNull = NullFilter.forColumn(condition.getArguments().get(0).getDirectColumn());
         }

diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/Calcites.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/Calcites.java
@@ -44,6 +44,7 @@
 import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.java.util.emitter.EmittingLogger;
 import org.apache.druid.math.expr.ExpressionProcessing;
+import org.apache.druid.math.expr.ExpressionProcessingConfig;
 import org.apache.druid.query.ordering.StringComparator;
 import org.apache.druid.query.ordering.StringComparators;
 import org.apache.druid.segment.column.ColumnType;
@@ -132,9 +133,15 @@ public static String escapeStringLiteral(final String s)
   }
 
   /**
-   * Convert {@link RelDataType} to the most appropriate {@link ValueType}
-   * Caller who want to coerce all ARRAY types to STRING can set `druid.expressions.allowArrayToStringCast`
-   * runtime property in {@link org.apache.druid.math.expr.ExpressionProcessingConfig}
+   * Convert {@link RelDataType} to the most appropriate {@link ColumnType}, or null if there is no {@link ColumnType}
+   * that is appropriate for this {@link RelDataType}.
+   *
+   * Equivalent to {@link #getValueTypeForRelDataTypeFull(RelDataType)}, except this method returns
+   * {@link ColumnType#STRING} when {@link ColumnType#isArray()} if
+   * {@link ExpressionProcessingConfig#processArraysAsMultiValueStrings()} is set via the server property
+   * {@code druid.expressions.allowArrayToStringCast}.
+   *
+   * @return type, or null if there is no matching type
    */
   @Nullable
   public static ColumnType getColumnTypeForRelDataType(final RelDataType type)
@@ -148,7 +155,13 @@ public static ColumnType getColumnTypeForRelDataType(final RelDataType type)
   }
 
   /**
-   * Convert {@link RelDataType} to the most appropriate {@link ValueType}
+   * Convert {@link RelDataType} to the most appropriate {@link ColumnType}, or null if there is no {@link ColumnType}
+   * that is appropriate for this {@link RelDataType}.
+   *
+   * Equivalent to {@link #getColumnTypeForRelDataType(RelDataType)}, but ignores
+   * {@link ExpressionProcessingConfig#processArraysAsMultiValueStrings()} (and acts as if it is false).
+   *
+   * @return type, or null if there is no matching type
    */
   @Nullable
   public static ColumnType getValueTypeForRelDataTypeFull(final RelDataType type)

diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/PlannerContext.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/PlannerContext.java
@@ -316,6 +316,14 @@ public boolean isStringifyArrays()
     return stringifyArrays;
   }
 
+  /**
+   * Whether we should use {@link org.apache.druid.query.filter.BoundDimFilter} and
+   * {@link org.apache.druid.query.filter.SelectorDimFilter} (true) or {@link org.apache.druid.query.filter.RangeFilter},
+   * {@link org.apache.druid.query.filter.EqualityFilter}, and {@link org.apache.druid.query.filter.NullFilter} (false).
+   *
+   * Typically true when {@link NullHandling#replaceWithDefault()} and false when {@link NullHandling#sqlCompatible()}.
+   * Can be overriden by the undocumented context parameter {@link #CTX_SQL_USE_BOUNDS_AND_SELECTORS}.
+   */
   public boolean isUseBoundsAndSelectors()
   {
     return useBoundsAndSelectors;