Skip to content

Commit

Permalink
HIVE-24167: Compilation fails due to equivalence mapping violation wh…
Browse files Browse the repository at this point in the history
…en CTE materialization is enabled (apache#5452) (Shohei Okumya, reviewed by Stamatis Zampetakis)
  • Loading branch information
okumin authored Nov 15, 2024
1 parent 06206b5 commit d85c239
Show file tree
Hide file tree
Showing 6 changed files with 1,530 additions and 893 deletions.
33 changes: 0 additions & 33 deletions ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
Expand Down Expand Up @@ -100,7 +99,6 @@
import org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer;
import org.apache.hadoop.hive.ql.optimizer.SortedDynPartitionOptimizer;
import org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyProcessor;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication;
import org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyPushdownProcessor;
import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkJoinDeDuplication;
Expand All @@ -117,7 +115,6 @@
import org.apache.hadoop.hive.ql.optimizer.physical.SerializeFilter;
import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger;
import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignature;
import org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics;
import org.apache.hadoop.hive.ql.optimizer.UnionDistinctMerger;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
Expand All @@ -140,7 +137,6 @@
import org.apache.hadoop.hive.ql.plan.TezWork;
import org.apache.hadoop.hive.ql.plan.mapper.AuxOpTreeSignature;
import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper;
import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper.EquivGroup;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.ql.stats.OperatorStats;
Expand Down Expand Up @@ -999,35 +995,6 @@ private void removeSemiJoinIfNoStats(OptimizeTezProcContext procCtx)
ogw.startWalking(topNodes, null);
}

private static class CollectAll implements SemanticNodeProcessor {
private PlanMapper planMapper;

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
throws SemanticException {
ParseContext pCtx = ((OptimizeTezProcContext) procCtx).parseContext;
planMapper = pCtx.getContext().getPlanMapper();
FilterOperator fop = (FilterOperator) nd;
OpTreeSignature sig = planMapper.getSignatureOf(fop);
List<EquivGroup> ar = getGroups(planMapper, HiveFilter.class);


return nd;
}

private List<EquivGroup> getGroups(PlanMapper planMapper2, Class<HiveFilter> class1) {
Iterator<EquivGroup> it = planMapper.iterateGroups();
List<EquivGroup> ret = new ArrayList<PlanMapper.EquivGroup>();
while (it.hasNext()) {
EquivGroup g = it.next();
if (g.getAll(class1).size() > 0) {
ret.add(g);
}
}
return ret;
}
}

private static class MarkRuntimeStatsAsIncorrect implements SemanticNodeProcessor {

private PlanMapper planMapper;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,17 @@
import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignatureFactory;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Sets;
import org.apache.hadoop.hive.ql.stats.OperatorStats;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Enables to connect related objects to eachother.
*
* Most importantly it aids to connect Operators to OperatorStats and probably RelNodes.
*/
public class PlanMapper {
private static final Logger LOG = LoggerFactory.getLogger(PlanMapper.class);

Set<EquivGroup> groups = new HashSet<>();
private Map<Object, EquivGroup> objectMap = new CompositeMap<>(OpTreeSignature.class, AuxOpTreeSignature.class);
Expand Down Expand Up @@ -217,7 +221,9 @@ private void link(Object o1, Object o2, boolean mayMerge) {
}
if (mGroups.size() > 1) {
if (!mayMerge) {
throw new RuntimeException("equivalence mapping violation");
LOG.warn("Illegally linking {} and {}", o1, o2);
mGroups.forEach(g -> g.add(new OperatorStats.IncorrectRuntimeStatsMarker()));
return;
}
EquivGroup newGrp = new EquivGroup();
newGrp.add(o1);
Expand Down Expand Up @@ -256,20 +262,15 @@ public <T> List<T> getAll(Class<T> clazz) {
return ret;
}

public void runMapper(GroupTransformer mapper) {
for (EquivGroup equivGroup : groups) {
mapper.map(equivGroup);
}
}

public <T> List<T> lookupAll(Class<T> clazz, Object key) {
private <T> List<T> lookupAll(Class<T> clazz, Object key) {
EquivGroup group = objectMap.get(key);
if (group == null) {
throw new NoSuchElementException(Objects.toString(key));
}
return group.getAll(clazz);
}

@VisibleForTesting
public <T> T lookup(Class<T> clazz, Object key) {
List<T> all = lookupAll(clazz, key);
if (all.size() != 1) {
Expand All @@ -279,7 +280,6 @@ public <T> T lookup(Class<T> clazz, Object key) {
return all.get(0);
}

@VisibleForTesting
public Iterator<EquivGroup> iterateGroups() {
return groups.iterator();

Expand Down
1 change: 0 additions & 1 deletion ql/src/test/queries/clientpositive/perf/cbo_query14.q
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
set hive.optimize.cte.suggester.class=org.apache.hadoop.hive.ql.optimizer.calcite.CommonTableExpressionPrintSuggester;
--! qt:disabled:HIVE-24167
set hive.mapred.mode=nonstrict;
-- start query 1 in stream 0 using template query14.tpl and seed 1819994127
explain cbo
Expand Down
1 change: 0 additions & 1 deletion ql/src/test/queries/clientpositive/perf/query14.q
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
--! qt:disabled:HIVE-24167
set hive.mapred.mode=nonstrict;
-- start query 1 in stream 0 using template query14.tpl and seed 1819994127
explain
Expand Down
Loading

0 comments on commit d85c239

Please sign in to comment.