Skip to content

Commit

Permalink
[MINOR][SQL] Additional test case for CheckCartesianProducts rule
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

While discovering optimization rules and their test coverage, I did not find any tests for `CheckCartesianProducts` in the Catalyst folder. So, I decided to create a new test suite. Once I finished, I found a test in `JoinSuite` for this functionality so feel free to discard this change if it does not make much sense. The proposed test suite covers a few additional use cases.

Author: aokolnychyi <[email protected]>

Closes apache#18909 from aokolnychyi/check-cartesian-join-tests.
  • Loading branch information
aokolnychyi authored and gatorsmile committed Aug 14, 2017
1 parent c0e333d commit 5596ce8
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.catalyst.optimizer

import org.scalatest.Matchers._

import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
import org.apache.spark.sql.catalyst.rules.RuleExecutor
import org.apache.spark.sql.internal.SQLConf.CROSS_JOINS_ENABLED

class CheckCartesianProductsSuite extends PlanTest {

object Optimize extends RuleExecutor[LogicalPlan] {
val batches = Batch("Check Cartesian Products", Once, CheckCartesianProducts) :: Nil
}

val testRelation1 = LocalRelation('a.int, 'b.int)
val testRelation2 = LocalRelation('c.int, 'd.int)

val joinTypesWithRequiredCondition = Seq(Inner, LeftOuter, RightOuter, FullOuter)
val joinTypesWithoutRequiredCondition = Seq(LeftSemi, LeftAnti, ExistenceJoin('exists))

test("CheckCartesianProducts doesn't throw an exception if cross joins are enabled)") {
withSQLConf(CROSS_JOINS_ENABLED.key -> "true") {
noException should be thrownBy {
for (joinType <- joinTypesWithRequiredCondition ++ joinTypesWithoutRequiredCondition) {
performCartesianProductCheck(joinType)
}
}
}
}

test("CheckCartesianProducts throws an exception for join types that require a join condition") {
withSQLConf(CROSS_JOINS_ENABLED.key -> "false") {
for (joinType <- joinTypesWithRequiredCondition) {
val thrownException = the [AnalysisException] thrownBy {
performCartesianProductCheck(joinType)
}
assert(thrownException.message.contains("Detected cartesian product"))
}
}
}

test("CheckCartesianProducts doesn't throw an exception if a join condition is present") {
withSQLConf(CROSS_JOINS_ENABLED.key -> "false") {
for (joinType <- joinTypesWithRequiredCondition) {
noException should be thrownBy {
performCartesianProductCheck(joinType, Some('a === 'd))
}
}
}
}

test("CheckCartesianProducts doesn't throw an exception if join types don't require conditions") {
withSQLConf(CROSS_JOINS_ENABLED.key -> "false") {
for (joinType <- joinTypesWithoutRequiredCondition) {
noException should be thrownBy {
performCartesianProductCheck(joinType)
}
}
}
}

private def performCartesianProductCheck(
joinType: JoinType,
condition: Option[Expression] = None): Unit = {
val analyzedPlan = testRelation1.join(testRelation2, joinType, condition).analyze
val optimizedPlan = Optimize.execute(analyzedPlan)
comparePlans(analyzedPlan, optimizedPlan)
}
}
3 changes: 2 additions & 1 deletion sql/core/src/test/resources/sql-tests/inputs/cross-join.sql
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,5 @@ create temporary view D(d, vd) as select * from nt1;

-- Allowed since cross join with C is explicit
select * from ((A join B on (a = b)) cross join C) join D on (a = d);

-- Cross joins with non-equal predicates
SELECT * FROM nt1 CROSS JOIN nt2 ON (nt1.k > nt2.k);
12 changes: 11 additions & 1 deletion sql/core/src/test/resources/sql-tests/results/cross-join.sql.out
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 12
-- Number of queries: 13


-- !query 0
Expand Down Expand Up @@ -127,3 +127,13 @@ three 3 three 3 two 2 three 3
two 2 two 2 one 1 two 2
two 2 two 2 three 3 two 2
two 2 two 2 two 2 two 2

-- !query 12
SELECT * FROM nt1 CROSS JOIN nt2 ON (nt1.k > nt2.k)
-- !query 12 schema
struct<k:string,v1:int,k:string,v2:int>
-- !query 12 output
three 3 one 1
three 3 one 5
two 2 one 1
two 2 one 5
32 changes: 32 additions & 0 deletions sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,9 @@ class JoinSuite extends QueryTest with SharedSQLContext {
Row(1, null, 2, 2) ::
Row(2, 2, 1, null) ::
Row(2, 2, 2, 2) :: Nil)
checkAnswer(
testData3.as("x").join(testData3.as("y"), $"x.a" > $"y.a"),
Row(2, 2, 1, null) :: Nil)
}
withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "false") {
val e = intercept[Exception] {
Expand Down Expand Up @@ -604,6 +607,35 @@ class JoinSuite extends QueryTest with SharedSQLContext {
}

cartesianQueries.foreach(checkCartesianDetection)

// Check that left_semi, left_anti, existence joins without conditions do not throw
// an exception if cross joins are disabled
withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "false") {
checkAnswer(
sql("SELECT * FROM testData3 LEFT SEMI JOIN testData2"),
Row(1, null) :: Row (2, 2) :: Nil)
checkAnswer(
sql("SELECT * FROM testData3 LEFT ANTI JOIN testData2"),
Nil)
checkAnswer(
sql(
"""
|SELECT a FROM testData3
|WHERE
| EXISTS (SELECT * FROM testData)
|OR
| EXISTS (SELECT * FROM testData2)""".stripMargin),
Row(1) :: Row(2) :: Nil)
checkAnswer(
sql(
"""
|SELECT key FROM testData
|WHERE
| key IN (SELECT a FROM testData2)
|OR
| key IN (SELECT a FROM testData3)""".stripMargin),
Row(1) :: Row(2) :: Row(3) :: Nil)
}
}

test("test SortMergeJoin (without spill)") {
Expand Down

0 comments on commit 5596ce8

Please sign in to comment.