From 60ec2922611693179204dd4929e6235d4b9947b3 Mon Sep 17 00:00:00 2001 From: Yufei Gu Date: Thu, 22 Aug 2024 08:42:30 -0700 Subject: [PATCH] Update Spark version for regression tests (#168) Co-authored-by: Yufei Gu --- regtests/README.md | 7 ------- regtests/run.sh | 5 ++++- regtests/run_spark_sql.sh | 5 ++++- regtests/setup.sh | 18 +++++++++++------- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/regtests/README.md b/regtests/README.md index 590201ffa..d4c8b92c6 100644 --- a/regtests/README.md +++ b/regtests/README.md @@ -74,13 +74,6 @@ AZURE_BLOB_TEST_BASE=abfss://@.blob.core.w into the `credentials` folder. Then specify the name of the file in your .env file - do not change the path, as `/tmp/credentials` is the folder on the container where the credentials file will be mounted. -## Setup without running tests - -Setup is idempotent. - -``` -./setup.sh -``` ## Experiment with failed test diff --git a/regtests/run.sh b/regtests/run.sh index eef12b622..aa5e9ebfe 100755 --- a/regtests/run.sh +++ b/regtests/run.sh @@ -16,8 +16,11 @@ # # Run without args to run all tests, or single arg for single test. +export SPARK_VERSION=spark-3.5.2 +export SPARK_DISTRIBUTION=${SPARK_VERSION}-bin-hadoop3-scala2.13 + if [ -z "${SPARK_HOME}"]; then - export SPARK_HOME=$(realpath ~/spark-3.5.1-bin-hadoop3-scala2.13) + export SPARK_HOME=$(realpath ~/${SPARK_DISTRIBUTION}) fi export PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" diff --git a/regtests/run_spark_sql.sh b/regtests/run_spark_sql.sh index 5451e2bd9..ba0c05a77 100755 --- a/regtests/run_spark_sql.sh +++ b/regtests/run_spark_sql.sh @@ -22,10 +22,13 @@ REGTEST_HOME=$(dirname $(realpath $0)) cd ${REGTEST_HOME} +export SPARK_VERSION=spark-3.5.2 +export SPARK_DISTRIBUTION=${SPARK_VERSION}-bin-hadoop3-scala2.13 + ./setup.sh if [ -z "${SPARK_HOME}"]; then - export SPARK_HOME=$(realpath ~/spark-3.5.1-bin-hadoop3-scala2.13) + export SPARK_HOME=$(realpath ~/${SPARK_DISTRIBUTION}) fi SPARK_BEARER_TOKEN="${REGTEST_ROOT_BEARER_TOKEN:-principal:root;realm:default-realm}" diff --git a/regtests/setup.sh b/regtests/setup.sh index e28a6fe05..039725f92 100755 --- a/regtests/setup.sh +++ b/regtests/setup.sh @@ -24,7 +24,7 @@ set -x SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) if [ -z "${SPARK_HOME}" ]; then - SPARK_HOME=$(realpath ~/spark-3.5.1-bin-hadoop3-scala2.13) + SPARK_HOME=$(realpath ~/${SPARK_DISTRIBUTION}) fi SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf" export PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" @@ -33,22 +33,26 @@ export PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7- echo 'Verifying Spark binaries...' if ! [ -f ${SPARK_HOME}/bin/spark-sql ]; then echo 'Setting up Spark...' - if ! [ -f ~/spark-3.5.1-bin-hadoop3-scala2.13.tgz ]; then + if [ -z "${SPARK_VERSION}" ] || [ -z "${SPARK_DISTRIBUTION}" ]; then + echo 'SPARK_VERSION or SPARK_DISTRIBUTION not set. Please set SPARK_VERSION and SPARK_DISTRIBUTION to the desired version.' + exit 1 + fi + if ! [ -f ~/${SPARK_DISTRIBUTION}.tgz ]; then echo 'Downloading spark distro...' - wget -O ~/spark-3.5.1-bin-hadoop3-scala2.13.tgz https://dlcdn.apache.org/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3-scala2.13.tgz - if ! [ -f ~/spark-3.5.1-bin-hadoop3-scala2.13.tgz ]; then + wget -O ~/${SPARK_DISTRIBUTION}.tgz https://dlcdn.apache.org/spark/${SPARK_VERSION}/${SPARK_DISTRIBUTION}.tgz + if ! [ -f ~/${SPARK_DISTRIBUTION}.tgz ]; then if [[ "${OSTYPE}" == "darwin"* ]]; then echo "Detected OS: mac. Running 'brew install wget' to try again." brew install wget - wget -O ~/spark-3.5.1-bin-hadoop3-scala2.13.tgz https://dlcdn.apache.org/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3-scala2.13.tgz + wget -O ~/${SPARK_DISTRIBUTION}.tgz https://dlcdn.apache.org/spark/${SPARK_VERSION}/${SPARK_DISTRIBUTION}.tgz fi fi else echo 'Found existing Spark tarball' fi - tar xzvf ~/spark-3.5.1-bin-hadoop3-scala2.13.tgz -C ~ + tar xzvf ~/${SPARK_DISTRIBUTION}.tgz -C ~ echo 'Done!' - SPARK_HOME=$(realpath ~/spark-3.5.1-bin-hadoop3-scala2.13) + SPARK_HOME=$(realpath ~/${SPARK_DISTRIBUTION}) SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf" else echo 'Verified Spark distro already installed.'