[SPARK-35878][CORE] Revert S3A endpoint fixup logic of SPARK-35878

### What changes were proposed in this pull request? Revert [SPARK-35878][CORE] Add fs.s3a.endpoint if unset and fs.s3a.endpoint.region is null Removing the region/endpoint patching code of SPARK-35878 avoids authentication problems with versions of the S3A connector built with AWS v2 SDK -as is the case in Hadoop 3.4.0. That is: if fs.s3a.endpoint is unset it will stay unset. The v2 SDK does its binding to AWS Services differently, in what can be described as "region first" binding. Spark setting the endpoint blocks S3 Express support and is incompatible with HADOOP-18975 S3A: Add option fs.s3a.endpoint.fips to use AWS FIPS endpoints The change is compatible with all releases of the s3a connector other than hadoop 3.3.1 binaries deployed outside EC2 and without the endpoint explicitly set. Change-Id: I59c51db8b8280a907fdd11131e527e7014cdefc3
steveloughran · Jan 22, 2024 · 1a0e9c0 · 1a0e9c0
1 parent 667c0a9
commit 1a0e9c0
Show file tree

Hide file tree

Showing 2 changed files with 0 additions and 43 deletions.
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -529,16 +529,6 @@ private[spark] object SparkHadoopUtil extends Logging {
     if (conf.getOption("spark.hadoop.fs.s3a.downgrade.syncable.exceptions").isEmpty) {
       hadoopConf.set("fs.s3a.downgrade.syncable.exceptions", "true", setBySpark)
     }
-    // In Hadoop 3.3.1, AWS region handling with the default "" endpoint only works
-    // in EC2 deployments or when the AWS CLI is installed.
-    // The workaround is to set the name of the S3 endpoint explicitly,
-    // if not already set. See HADOOP-17771.
-    if (hadoopConf.get("fs.s3a.endpoint", "").isEmpty &&
-      hadoopConf.get("fs.s3a.endpoint.region") == null) {
-      // set to US central endpoint which can also connect to buckets
-      // in other regions at the expense of a HEAD request during fs creation
-      hadoopConf.set("fs.s3a.endpoint", "s3.amazonaws.com", setBySpark)
-    }
   }
 
   private def appendSparkHiveConfigs(conf: SparkConf, hadoopConf: Configuration): Unit = {

diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkHadoopUtilSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkHadoopUtilSuite.scala
@@ -39,19 +39,6 @@ class SparkHadoopUtilSuite extends SparkFunSuite {
     assertConfigMatches(hadoopConf, "orc.filterPushdown", "true", SOURCE_SPARK_HADOOP)
     assertConfigMatches(hadoopConf, "fs.s3a.downgrade.syncable.exceptions", "true",
       SET_TO_DEFAULT_VALUES)
-    assertConfigMatches(hadoopConf, "fs.s3a.endpoint", "s3.amazonaws.com", SET_TO_DEFAULT_VALUES)
-  }
-
-  /**
-   * An empty S3A endpoint will be overridden just as a null value
-   * would.
-   */
-  test("appendSparkHadoopConfigs with S3A endpoint set to empty string") {
-    val sc = new SparkConf()
-    val hadoopConf = new Configuration(false)
-    sc.set("spark.hadoop.fs.s3a.endpoint", "")
-    new SparkHadoopUtil().appendSparkHadoopConfigs(sc, hadoopConf)
-    assertConfigMatches(hadoopConf, "fs.s3a.endpoint", "s3.amazonaws.com", SET_TO_DEFAULT_VALUES)
   }
 
   /**
@@ -61,28 +48,8 @@ class SparkHadoopUtilSuite extends SparkFunSuite {
     val sc = new SparkConf()
     val hadoopConf = new Configuration(false)
     sc.set("spark.hadoop.fs.s3a.downgrade.syncable.exceptions", "false")
-    sc.set("spark.hadoop.fs.s3a.endpoint", "s3-eu-west-1.amazonaws.com")
     new SparkHadoopUtil().appendSparkHadoopConfigs(sc, hadoopConf)
     assertConfigValue(hadoopConf, "fs.s3a.downgrade.syncable.exceptions", "false")
-    assertConfigValue(hadoopConf, "fs.s3a.endpoint",
-      "s3-eu-west-1.amazonaws.com")
-  }
-
-  /**
-   * If the endpoint region is set (even to a blank string) in
-   * "spark.hadoop.fs.s3a.endpoint.region" then the endpoint is not set,
-   * even when the s3a endpoint is "".
-   * This supports a feature in hadoop 3.3.1 where this configuration
-   * pair triggers a revert to the "SDK to work out the region" algorithm,
-   * which works on EC2 deployments.
-   */
-  test("appendSparkHadoopConfigs with S3A endpoint region set to an empty string") {
-    val sc = new SparkConf()
-    val hadoopConf = new Configuration(false)
-    sc.set("spark.hadoop.fs.s3a.endpoint.region", "")
-    new SparkHadoopUtil().appendSparkHadoopConfigs(sc, hadoopConf)
-    // the endpoint value will not have been set
-    assertConfigValue(hadoopConf, "fs.s3a.endpoint", null)
   }
 
   /**