[SPARK-33082][SPARK-20202][BUILD][SQL][FOLLOW-UP] Remove Hive 1.2 wor…

…karounds and Hive 1.2 profile in Jenkins script ### What changes were proposed in this pull request? This PR removes the leftover of Hive 1.2 workarounds and Hive 1.2 profile in Jenkins script. - `test-hive1.2` title is not used anymore in Jenkins - Remove some comments related to Hive 1.2 - Remove unused codes in `OrcFilters.scala` Hive - Test `spark.sql.hive.convertMetastoreOrc` disabled case for the tests added at SPARK-19809 and SPARK-22267 ### Why are the changes needed? To remove unused codes & improve test coverage ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? Manually ran the unit tests. Also It will be tested in CI in this PR. Closes apache#29973 from HyukjinKwon/SPARK-33082-SPARK-20202. Authored-by: HyukjinKwon <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
shaotongtc · Oct 9, 2020 · 2e07ed3 · 2e07ed3
1 parent edb140e
commit 2e07ed3
Show file tree

Hide file tree

Showing 9 changed files with 13 additions and 970 deletions.
diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-1.2 b/dev/deps/spark-deps-hadoop-2.7-hive-1.2
diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py
@@ -175,8 +175,6 @@ def main():
     if "test-hadoop3.2" in ghprb_pull_title:
         os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop3.2"
     # Switch the Hive profile based on the PR title:
-    if "test-hive1.2" in ghprb_pull_title:
-        os.environ["AMPLAB_JENKINS_BUILD_HIVE_PROFILE"] = "hive1.2"
     if "test-hive2.3" in ghprb_pull_title:
         os.environ["AMPLAB_JENKINS_BUILD_HIVE_PROFILE"] = "hive2.3"
 

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
@@ -39,10 +39,6 @@ import org.apache.spark.sql.types._
 
 /**
  * A test suite that tests Apache ORC filter API based filter pushdown optimization.
- * OrcFilterSuite and HiveOrcFilterSuite is logically duplicated to provide the same test coverage.
- * The difference are the packages containing 'Predicate' and 'SearchArgument' classes.
- * - OrcFilterSuite uses 'org.apache.orc.storage.ql.io.sarg' package.
- * - HiveOrcFilterSuite uses 'org.apache.hadoop.hive.ql.io.sarg' package.
  */
 class OrcFilterSuite extends OrcTest with SharedSparkSession {
 

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
@@ -217,7 +217,6 @@ abstract class OrcQueryTest extends OrcTest {
     }
   }
 
-  // Hive supports zlib, snappy and none for Hive 1.2.1.
   test("Compression options for writing to an ORC file (SNAPPY, ZLIB and NONE)") {
     withTempPath { file =>
       spark.range(0, 10).write

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
@@ -46,7 +46,6 @@ import org.apache.spark.sql.internal.SQLConf.ORC_IMPLEMENTATION
  *       -> OrcPartitionDiscoverySuite
  *       -> HiveOrcPartitionDiscoverySuite
  *   -> OrcFilterSuite
- *   -> HiveOrcFilterSuite
  */
 abstract class OrcTest extends QueryTest with FileBasedDataSourceTest with BeforeAndAfterAll {
 

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -47,7 +47,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.execution.datasources.orc.OrcOptions
+import org.apache.spark.sql.execution.datasources.orc.{OrcFilters, OrcOptions}
 import org.apache.spark.sql.hive.{HiveInspectors, HiveShim}
 import org.apache.spark.sql.sources.{Filter, _}
 import org.apache.spark.sql.types._
@@ -139,7 +139,7 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
 
     if (sparkSession.sessionState.conf.orcFilterPushDown) {
       // Sets pushed predicates
-      OrcFilters.createFilter(requiredSchema, filters.toArray).foreach { f =>
+      OrcFilters.createFilter(requiredSchema, filters).foreach { f =>
         hadoopConf.set(OrcFileFormat.SARG_PUSHDOWN, toKryo(f))
         hadoopConf.setBoolean(ConfVars.HIVEOPTINDEXFILTER.varname, true)
       }
@@ -296,7 +296,7 @@ private[orc] class OrcOutputWriter(
 
   override def close(): Unit = {
     if (recordWriterInstantiated) {
-      // Hive 1.2.1 ORC initializes its private `writer` field at the first write.
+      // Hive ORC initializes its private `writer` field at the first write.
       OrcFileFormat.addSparkVersionMetadata(recordWriter)
       recordWriter.close(Reporter.NULL)
     }