forked from apache/hive
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
HIVE-28675: Maximize the removal of redundant columns from GROUP BY c…
…lauses (Stamatis Zampetakis reviewed by Soumyakanti Das, Ramesh Kumar) Enhance HiveRelFieldTrimmer to remove the maximum number of redundant columns from the GROUP BY clause. The optimization has the following benefits: 1. Generate more efficient plans by pruning as many columns as possible (less CPU/IO/network cost). 2. Avoid missing optimization opportunities by examining all candidates. Close apache#5586
- Loading branch information
Showing
3 changed files
with
137 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
18 changes: 18 additions & 0 deletions
18
ql/src/test/queries/clientpositive/cbo_groupby_remove_key.q
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
CREATE TABLE passenger | ||
( | ||
id INT NOT NULL, | ||
fname STRING NOT NULL, | ||
lname STRING NOT NULL, | ||
passport STRING NOT NULL, | ||
UNIQUE (id) DISABLE RELY, | ||
UNIQUE (passport) DISABLE RELY, | ||
UNIQUE (fname, lname) DISABLE RELY | ||
); | ||
|
||
EXPLAIN CBO SELECT id, COUNT(1) FROM passenger GROUP BY id, passport; | ||
EXPLAIN CBO SELECT passport, COUNT(1) FROM passenger GROUP BY id, passport; | ||
EXPLAIN CBO SELECT id, COUNT(1) FROM passenger GROUP BY id, fname, lname, passport; | ||
EXPLAIN CBO SELECT passport, COUNT(1) FROM passenger GROUP BY id, fname, lname, passport; | ||
EXPLAIN CBO SELECT fname, COUNT(1) FROM passenger GROUP BY id, fname, lname, passport; | ||
EXPLAIN CBO SELECT lname, COUNT(1) FROM passenger GROUP BY id, fname, lname, passport; | ||
EXPLAIN CBO SELECT fname, lname, COUNT(1) FROM passenger GROUP BY id, fname, lname, passport; |
112 changes: 112 additions & 0 deletions
112
ql/src/test/results/clientpositive/llap/cbo_groupby_remove_key.q.out
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
PREHOOK: query: CREATE TABLE passenger | ||
( | ||
id INT NOT NULL, | ||
fname STRING NOT NULL, | ||
lname STRING NOT NULL, | ||
passport STRING NOT NULL, | ||
UNIQUE (id) DISABLE RELY, | ||
UNIQUE (passport) DISABLE RELY, | ||
UNIQUE (fname, lname) DISABLE RELY | ||
) | ||
PREHOOK: type: CREATETABLE | ||
PREHOOK: Output: database:default | ||
PREHOOK: Output: default@passenger | ||
POSTHOOK: query: CREATE TABLE passenger | ||
( | ||
id INT NOT NULL, | ||
fname STRING NOT NULL, | ||
lname STRING NOT NULL, | ||
passport STRING NOT NULL, | ||
UNIQUE (id) DISABLE RELY, | ||
UNIQUE (passport) DISABLE RELY, | ||
UNIQUE (fname, lname) DISABLE RELY | ||
) | ||
POSTHOOK: type: CREATETABLE | ||
POSTHOOK: Output: database:default | ||
POSTHOOK: Output: default@passenger | ||
PREHOOK: query: EXPLAIN CBO SELECT id, COUNT(1) FROM passenger GROUP BY id, passport | ||
PREHOOK: type: QUERY | ||
PREHOOK: Input: default@passenger | ||
#### A masked pattern was here #### | ||
POSTHOOK: query: EXPLAIN CBO SELECT id, COUNT(1) FROM passenger GROUP BY id, passport | ||
POSTHOOK: type: QUERY | ||
POSTHOOK: Input: default@passenger | ||
#### A masked pattern was here #### | ||
CBO PLAN: | ||
HiveAggregate(group=[{0}], agg#0=[count()]) | ||
HiveTableScan(table=[[default, passenger]], table:alias=[passenger]) | ||
|
||
PREHOOK: query: EXPLAIN CBO SELECT passport, COUNT(1) FROM passenger GROUP BY id, passport | ||
PREHOOK: type: QUERY | ||
PREHOOK: Input: default@passenger | ||
#### A masked pattern was here #### | ||
POSTHOOK: query: EXPLAIN CBO SELECT passport, COUNT(1) FROM passenger GROUP BY id, passport | ||
POSTHOOK: type: QUERY | ||
POSTHOOK: Input: default@passenger | ||
#### A masked pattern was here #### | ||
CBO PLAN: | ||
HiveAggregate(group=[{3}], agg#0=[count()]) | ||
HiveTableScan(table=[[default, passenger]], table:alias=[passenger]) | ||
|
||
PREHOOK: query: EXPLAIN CBO SELECT id, COUNT(1) FROM passenger GROUP BY id, fname, lname, passport | ||
PREHOOK: type: QUERY | ||
PREHOOK: Input: default@passenger | ||
#### A masked pattern was here #### | ||
POSTHOOK: query: EXPLAIN CBO SELECT id, COUNT(1) FROM passenger GROUP BY id, fname, lname, passport | ||
POSTHOOK: type: QUERY | ||
POSTHOOK: Input: default@passenger | ||
#### A masked pattern was here #### | ||
CBO PLAN: | ||
HiveAggregate(group=[{0}], agg#0=[count()]) | ||
HiveTableScan(table=[[default, passenger]], table:alias=[passenger]) | ||
|
||
PREHOOK: query: EXPLAIN CBO SELECT passport, COUNT(1) FROM passenger GROUP BY id, fname, lname, passport | ||
PREHOOK: type: QUERY | ||
PREHOOK: Input: default@passenger | ||
#### A masked pattern was here #### | ||
POSTHOOK: query: EXPLAIN CBO SELECT passport, COUNT(1) FROM passenger GROUP BY id, fname, lname, passport | ||
POSTHOOK: type: QUERY | ||
POSTHOOK: Input: default@passenger | ||
#### A masked pattern was here #### | ||
CBO PLAN: | ||
HiveAggregate(group=[{3}], agg#0=[count()]) | ||
HiveTableScan(table=[[default, passenger]], table:alias=[passenger]) | ||
|
||
PREHOOK: query: EXPLAIN CBO SELECT fname, COUNT(1) FROM passenger GROUP BY id, fname, lname, passport | ||
PREHOOK: type: QUERY | ||
PREHOOK: Input: default@passenger | ||
#### A masked pattern was here #### | ||
POSTHOOK: query: EXPLAIN CBO SELECT fname, COUNT(1) FROM passenger GROUP BY id, fname, lname, passport | ||
POSTHOOK: type: QUERY | ||
POSTHOOK: Input: default@passenger | ||
#### A masked pattern was here #### | ||
CBO PLAN: | ||
HiveProject(fname=[$1], _o__c1=[$2]) | ||
HiveAggregate(group=[{0, 1}], agg#0=[count()]) | ||
HiveTableScan(table=[[default, passenger]], table:alias=[passenger]) | ||
|
||
PREHOOK: query: EXPLAIN CBO SELECT lname, COUNT(1) FROM passenger GROUP BY id, fname, lname, passport | ||
PREHOOK: type: QUERY | ||
PREHOOK: Input: default@passenger | ||
#### A masked pattern was here #### | ||
POSTHOOK: query: EXPLAIN CBO SELECT lname, COUNT(1) FROM passenger GROUP BY id, fname, lname, passport | ||
POSTHOOK: type: QUERY | ||
POSTHOOK: Input: default@passenger | ||
#### A masked pattern was here #### | ||
CBO PLAN: | ||
HiveProject(lname=[$1], _o__c1=[$2]) | ||
HiveAggregate(group=[{0, 2}], agg#0=[count()]) | ||
HiveTableScan(table=[[default, passenger]], table:alias=[passenger]) | ||
|
||
PREHOOK: query: EXPLAIN CBO SELECT fname, lname, COUNT(1) FROM passenger GROUP BY id, fname, lname, passport | ||
PREHOOK: type: QUERY | ||
PREHOOK: Input: default@passenger | ||
#### A masked pattern was here #### | ||
POSTHOOK: query: EXPLAIN CBO SELECT fname, lname, COUNT(1) FROM passenger GROUP BY id, fname, lname, passport | ||
POSTHOOK: type: QUERY | ||
POSTHOOK: Input: default@passenger | ||
#### A masked pattern was here #### | ||
CBO PLAN: | ||
HiveAggregate(group=[{1, 2}], agg#0=[count()]) | ||
HiveTableScan(table=[[default, passenger]], table:alias=[passenger]) | ||
|