Skip to content

Commit

Permalink
Improve cost calculation for MULTI_DISTINCT_COUNT function (StarRocks…
Browse files Browse the repository at this point in the history
  • Loading branch information
Youngwb authored Sep 24, 2021
1 parent 44c4118 commit 97c46c5
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,14 @@ public CostEstimate computeAggFunExtraCost(PhysicalHashAggregateOperator node, S
if (distinctColumnStats.isUnknown()) {
hashSetSize = rowSize * inputStatistics.getOutputRowCount() / statistics.getOutputRowCount();
} else {
// we need to estimate the distinct values in each bucket because of do not know the
// correlation between the Group BY column and the DISTINCT column.
// There are estimated to (DistinctValuesCount / buckets * 2) entries distinct values in each bucket
// except when bucket number equals 1.
double distinctValuesPerBucket = buckets == 1 ? distinctColumnStats.getDistinctValuesCount() :
distinctColumnStats.getDistinctValuesCount() / buckets * 2;
// 40 bytes is the state cost of hashset
hashSetSize = rowSize * distinctColumnStats.getDistinctValuesCount() / buckets * 2 + 40;
hashSetSize = rowSize * distinctValuesPerBucket + 40;
}
costEstimate = CostEstimate.addCost(costEstimate, CostEstimate.ofMemory(buckets * hashSetSize));
}
Expand Down Expand Up @@ -239,7 +245,8 @@ public CostEstimate visitPhysicalDistribution(PhysicalDistributionOperator node,
result = CostEstimate.of(statistics.getOutputSize(), 0, statistics.getOutputSize());
break;
default:
throw new StarRocksPlannerException("not support " + distributionSpec.getType() + "distribution type",
throw new StarRocksPlannerException(
"not support " + distributionSpec.getType() + "distribution type",
ErrorType.UNSUPPORTED);
}
return result;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,16 @@ public void testCountDistinctWithGroupHighCountHigh() throws Exception {
+ " | output: count(2: P_NAME)"));
}

@Test
public void testCountDistinctWithoutGroupBy() throws Exception {
String sql = "SELECT COUNT (DISTINCT l_partkey) FROM lineitem";
String planFragment = getFragmentPlan(sql);
Assert.assertTrue(planFragment.contains("3:AGGREGATE (merge finalize)\n" +
" | output: multi_distinct_count(18: count(distinct 2: L_PARTKEY))"));
Assert.assertTrue(planFragment.contains("1:AGGREGATE (update serialize)\n" +
" | output: multi_distinct_count(2: L_PARTKEY)"));
}

@Test
public void testJoinDateAndDateTime() throws Exception {
String sql = "select count(a.id_date) from test_all_type a " +
Expand Down

0 comments on commit 97c46c5

Please sign in to comment.